feat: make index batch size configurable
Alan Pearce alan@alanpearce.eu
Thu, 27 Mar 2025 17:03:10 +0100
7 files changed, 25 insertions(+), 12 deletions(-)
M cmd/searchix-web/main.go → cmd/searchix-web/main.go
@@ -77,6 +77,7 @@ cfg.DataPath, *replace, &index.Options{ LowMemory: cfg.Importer.LowMemory, + BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), }, )
M defaults.toml → defaults.toml
@@ -64,6 +64,8 @@ # Settings for the import job [Importer] # Use less memory at the expense of import performance LowMemory = false +# Number of items to process in each batch (affects memory usage). +BatchSize = 10000 # Abort fetch and import process for all jobs if it takes longer than this value. Timeout = '30m0s' # Time of day (UTC) to run fetch/import process
M internal/config/default.go → internal/config/default.go
@@ -49,6 +49,7 @@ LogRequests: true, }, Importer: &Importer{ LowMemory: false, + BatchSize: 10_000, Timeout: Duration{30 * time.Minute}, UpdateAt: mustLocalTime("03:00:00"), Sources: map[string]*Source{
M internal/config/structs.go → internal/config/structs.go
@@ -31,6 +31,7 @@ type Importer struct { Sources map[string]*Source LowMemory bool `comment:"Use less memory at the expense of import performance"` + BatchSize int `comment:"Number of items to process in each batch (affects memory usage)."` Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` UpdateAt LocalTime `comment:"Time of day (UTC) to run fetch/import process"` }
M internal/importer/main_test.go → internal/importer/main_test.go
@@ -16,6 +16,7 @@ tmp := b.TempDir() logger := log.Configure(false) _, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{ LowMemory: true, + BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), }) if err != nil {
M internal/index/indexer.go → internal/index/indexer.go
@@ -11,6 +11,7 @@ "path" "path/filepath" "slices" + "go.alanpearce.eu/searchix/internal/config" "go.alanpearce.eu/searchix/internal/file" "go.alanpearce.eu/searchix/internal/nix" "go.alanpearce.eu/x/log" @@ -34,13 +35,15 @@ ) type Options struct { LowMemory bool + BatchSize int Logger *log.Logger } type WriteIndex struct { - index bleve.Index - log *log.Logger - Meta *Meta + batchSize int + index bleve.Index + log *log.Logger + Meta *Meta } type BatchError struct { @@ -50,8 +53,6 @@ func (e *BatchError) Error() string { return e.E.Error() } - -var batchSize = 10_000 func createIndexMapping() (mapping.IndexMapping, errors.E) { indexMapping := bleve.NewIndexMapping() @@ -268,8 +269,12 @@ return nil, nil, exists, err } } - if options.LowMemory { - batchSize = 1_000 + if options.BatchSize == 0 { + options.BatchSize = config.DefaultConfig.Importer.BatchSize + } + + if options.LowMemory && options.BatchSize == config.DefaultConfig.Importer.BatchSize { + options.BatchSize = 1_000 } return &ReadIndex{ @@ -278,9 +283,10 @@ log: options.Logger, meta: meta, }, &WriteIndex{ - index: idx, - log: options.Logger, - Meta: meta, + index: idx, + batchSize: options.BatchSize, + log: options.Logger, + Meta: meta, }, exists, nil @@ -337,7 +343,7 @@ continue } - if k++; k%batchSize == 0 { + if k++; k%i.batchSize == 0 { err = i.Flush(batch) if err != nil { errs <- err @@ -405,7 +411,7 @@ batch := i.index.NewBatch() var k int for _, hit := range results.Hits { batch.Delete(hit.ID) - if k++; k%batchSize == 0 { + if k++; k%i.batchSize == 0 { err := i.Flush(batch) if err != nil { return err
M internal/index/search_test.go → internal/index/search_test.go
@@ -22,6 +22,7 @@ cfg := config.DefaultConfig read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{ Logger: log.Named("index"), + BatchSize: cfg.Importer.BatchSize, LowMemory: false, }) defer read.Close()