all repos — searchix @ f38ccb5ec8149072c93a6c3173da06ba1d724c4c

Search engine for NixOS, nix-darwin, home-manager and NUR users

feat: make index batch size configurable
Alan Pearce alan@alanpearce.eu
Thu, 27 Mar 2025 17:03:10 +0100
commit

f38ccb5ec8149072c93a6c3173da06ba1d724c4c

parent

75c2afdf214b0ddc57efe8e621eb172506c0b0f7

M cmd/searchix-web/main.gocmd/searchix-web/main.go
@@ -77,6 +77,7 @@ cfg.DataPath, 		*replace,
 		&index.Options{
 			LowMemory: cfg.Importer.LowMemory,
+			BatchSize: cfg.Importer.BatchSize,
 			Logger:    logger.Named("index"),
 		},
 	)
M defaults.tomldefaults.toml
@@ -64,6 +64,8 @@ # Settings for the import job [Importer]
 # Use less memory at the expense of import performance
 LowMemory = false
+# Number of items to process in each batch (affects memory usage).
+BatchSize = 10000
 # Abort fetch and import process for all jobs if it takes longer than this value.
 Timeout = '30m0s'
 # Time of day (UTC) to run fetch/import process
M internal/config/default.gointernal/config/default.go
@@ -49,6 +49,7 @@ LogRequests: true, 	},
 	Importer: &Importer{
 		LowMemory: false,
+		BatchSize: 10_000,
 		Timeout:   Duration{30 * time.Minute},
 		UpdateAt:  mustLocalTime("03:00:00"),
 		Sources: map[string]*Source{
M internal/config/structs.gointernal/config/structs.go
@@ -31,6 +31,7 @@ type Importer struct {
 	Sources   map[string]*Source
 	LowMemory bool      `comment:"Use less memory at the expense of import performance"`
+	BatchSize int       `comment:"Number of items to process in each batch (affects memory usage)."`
 	Timeout   Duration  `comment:"Abort fetch and import process for all jobs if it takes longer than this value."`
 	UpdateAt  LocalTime `comment:"Time of day (UTC) to run fetch/import process"`
 }
M internal/importer/main_test.gointernal/importer/main_test.go
@@ -16,6 +16,7 @@ tmp := b.TempDir() 	logger := log.Configure(false)
 	_, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{
 		LowMemory: true,
+		BatchSize: cfg.Importer.BatchSize,
 		Logger:    logger.Named("index"),
 	})
 	if err != nil {
M internal/index/indexer.gointernal/index/indexer.go
@@ -11,6 +11,7 @@ "path" 	"path/filepath"
 	"slices"
 
+	"go.alanpearce.eu/searchix/internal/config"
 	"go.alanpearce.eu/searchix/internal/file"
 	"go.alanpearce.eu/searchix/internal/nix"
 	"go.alanpearce.eu/x/log"
@@ -34,13 +35,15 @@ ) 
 type Options struct {
 	LowMemory bool
+	BatchSize int
 	Logger    *log.Logger
 }
 
 type WriteIndex struct {
-	index bleve.Index
-	log   *log.Logger
-	Meta  *Meta
+	batchSize int
+	index     bleve.Index
+	log       *log.Logger
+	Meta      *Meta
 }
 
 type BatchError struct {
@@ -50,8 +53,6 @@ func (e *BatchError) Error() string {
 	return e.E.Error()
 }
-
-var batchSize = 10_000
 
 func createIndexMapping() (mapping.IndexMapping, errors.E) {
 	indexMapping := bleve.NewIndexMapping()
@@ -268,8 +269,12 @@ return nil, nil, exists, err 		}
 	}
 
-	if options.LowMemory {
-		batchSize = 1_000
+	if options.BatchSize == 0 {
+		options.BatchSize = config.DefaultConfig.Importer.BatchSize
+	}
+
+	if options.LowMemory && options.BatchSize == config.DefaultConfig.Importer.BatchSize {
+		options.BatchSize = 1_000
 	}
 
 	return &ReadIndex{
@@ -278,9 +283,10 @@ log:   options.Logger, 			meta:  meta,
 		},
 		&WriteIndex{
-			index: idx,
-			log:   options.Logger,
-			Meta:  meta,
+			index:     idx,
+			batchSize: options.BatchSize,
+			log:       options.Logger,
+			Meta:      meta,
 		},
 		exists,
 		nil
@@ -337,7 +343,7 @@ 				continue
 			}
 
-			if k++; k%batchSize == 0 {
+			if k++; k%i.batchSize == 0 {
 				err = i.Flush(batch)
 				if err != nil {
 					errs <- err
@@ -405,7 +411,7 @@ batch := i.index.NewBatch() 	var k int
 	for _, hit := range results.Hits {
 		batch.Delete(hit.ID)
-		if k++; k%batchSize == 0 {
+		if k++; k%i.batchSize == 0 {
 			err := i.Flush(batch)
 			if err != nil {
 				return err
M internal/index/search_test.gointernal/index/search_test.go
@@ -22,6 +22,7 @@ cfg := config.DefaultConfig 
 	read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{
 		Logger:    log.Named("index"),
+		BatchSize: cfg.Importer.BatchSize,
 		LowMemory: false,
 	})
 	defer read.Close()