all repos — searchix @ f38ccb5ec8149072c93a6c3173da06ba1d724c4c

Search engine for NixOS, nix-darwin, home-manager and NUR users

feat: make index batch size configurable

Alan Pearce
commit

f38ccb5ec8149072c93a6c3173da06ba1d724c4c

parent

75c2afdf214b0ddc57efe8e621eb172506c0b0f7

M cmd/searchix-web/main.gocmd/searchix-web/main.go
@@ -77,6 +77,7 @@ cfg.DataPath,
*replace, &index.Options{ LowMemory: cfg.Importer.LowMemory, + BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), }, )
M defaults.tomldefaults.toml
@@ -64,6 +64,8 @@ # Settings for the import job
[Importer] # Use less memory at the expense of import performance LowMemory = false +# Number of items to process in each batch (affects memory usage). +BatchSize = 10000 # Abort fetch and import process for all jobs if it takes longer than this value. Timeout = '30m0s' # Time of day (UTC) to run fetch/import process
M internal/config/default.gointernal/config/default.go
@@ -49,6 +49,7 @@ LogRequests: true,
}, Importer: &Importer{ LowMemory: false, + BatchSize: 10_000, Timeout: Duration{30 * time.Minute}, UpdateAt: mustLocalTime("03:00:00"), Sources: map[string]*Source{
M internal/config/structs.gointernal/config/structs.go
@@ -31,6 +31,7 @@
type Importer struct { Sources map[string]*Source LowMemory bool `comment:"Use less memory at the expense of import performance"` + BatchSize int `comment:"Number of items to process in each batch (affects memory usage)."` Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` UpdateAt LocalTime `comment:"Time of day (UTC) to run fetch/import process"` }
M internal/importer/main_test.gointernal/importer/main_test.go
@@ -16,6 +16,7 @@ tmp := b.TempDir()
logger := log.Configure(false) _, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{ LowMemory: true, + BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), }) if err != nil {
M internal/index/indexer.gointernal/index/indexer.go
@@ -11,6 +11,7 @@ "path"
"path/filepath" "slices" + "go.alanpearce.eu/searchix/internal/config" "go.alanpearce.eu/searchix/internal/file" "go.alanpearce.eu/searchix/internal/nix" "go.alanpearce.eu/x/log"
@@ -34,13 +35,15 @@ )
type Options struct { LowMemory bool + BatchSize int Logger *log.Logger } type WriteIndex struct { - index bleve.Index - log *log.Logger - Meta *Meta + batchSize int + index bleve.Index + log *log.Logger + Meta *Meta } type BatchError struct {
@@ -50,8 +53,6 @@
func (e *BatchError) Error() string { return e.E.Error() } - -var batchSize = 10_000 func createIndexMapping() (mapping.IndexMapping, errors.E) { indexMapping := bleve.NewIndexMapping()
@@ -268,8 +269,12 @@ return nil, nil, exists, err
} } - if options.LowMemory { - batchSize = 1_000 + if options.BatchSize == 0 { + options.BatchSize = config.DefaultConfig.Importer.BatchSize + } + + if options.LowMemory && options.BatchSize == config.DefaultConfig.Importer.BatchSize { + options.BatchSize = 1_000 } return &ReadIndex{
@@ -278,9 +283,10 @@ log: options.Logger,
meta: meta, }, &WriteIndex{ - index: idx, - log: options.Logger, - Meta: meta, + index: idx, + batchSize: options.BatchSize, + log: options.Logger, + Meta: meta, }, exists, nil
@@ -337,7 +343,7 @@
continue } - if k++; k%batchSize == 0 { + if k++; k%i.batchSize == 0 { err = i.Flush(batch) if err != nil { errs <- err
@@ -405,7 +411,7 @@ batch := i.index.NewBatch()
var k int for _, hit := range results.Hits { batch.Delete(hit.ID) - if k++; k%batchSize == 0 { + if k++; k%i.batchSize == 0 { err := i.Flush(batch) if err != nil { return err
M internal/index/search_test.gointernal/index/search_test.go
@@ -22,6 +22,7 @@ cfg := config.DefaultConfig
read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{ Logger: log.Named("index"), + BatchSize: cfg.Importer.BatchSize, LowMemory: false, }) defer read.Close()