diff options
author | Alan Pearce | 2025-03-27 17:03:10 +0100 |
---|---|---|
committer | Alan Pearce | 2025-03-27 17:03:10 +0100 |
commit | f38ccb5ec8149072c93a6c3173da06ba1d724c4c (patch) | |
tree | 9b7d2a560e2dcbcf6fd2599a7ef8f6bd29980b72 | |
parent | 75c2afdf214b0ddc57efe8e621eb172506c0b0f7 (diff) | |
download | searchix-f38ccb5ec8149072c93a6c3173da06ba1d724c4c.tar.lz searchix-f38ccb5ec8149072c93a6c3173da06ba1d724c4c.tar.zst searchix-f38ccb5ec8149072c93a6c3173da06ba1d724c4c.zip |
feat: make index batch size configurable
-rw-r--r-- | cmd/searchix-web/main.go | 1 | ||||
-rw-r--r-- | defaults.toml | 2 | ||||
-rw-r--r-- | internal/config/default.go | 1 | ||||
-rw-r--r-- | internal/config/structs.go | 1 | ||||
-rw-r--r-- | internal/importer/main_test.go | 1 | ||||
-rw-r--r-- | internal/index/indexer.go | 30 | ||||
-rw-r--r-- | internal/index/search_test.go | 1 |
7 files changed, 25 insertions, 12 deletions
diff --git a/cmd/searchix-web/main.go b/cmd/searchix-web/main.go index 20d4a94..061e08d 100644 --- a/cmd/searchix-web/main.go +++ b/cmd/searchix-web/main.go @@ -77,6 +77,7 @@ func main() { *replace, &index.Options{ LowMemory: cfg.Importer.LowMemory, + BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), }, ) diff --git a/defaults.toml b/defaults.toml index f75aae6..2ceb0eb 100644 --- a/defaults.toml +++ b/defaults.toml @@ -64,6 +64,8 @@ x-frame-options = 'DENY' [Importer] # Use less memory at the expense of import performance LowMemory = false +# Number of items to process in each batch (affects memory usage). +BatchSize = 10000 # Abort fetch and import process for all jobs if it takes longer than this value. Timeout = '30m0s' # Time of day (UTC) to run fetch/import process diff --git a/internal/config/default.go b/internal/config/default.go index 5260fe9..0ec0735 100644 --- a/internal/config/default.go +++ b/internal/config/default.go @@ -49,6 +49,7 @@ var DefaultConfig = Config{ }, Importer: &Importer{ LowMemory: false, + BatchSize: 10_000, Timeout: Duration{30 * time.Minute}, UpdateAt: mustLocalTime("03:00:00"), Sources: map[string]*Source{ diff --git a/internal/config/structs.go b/internal/config/structs.go index 30ad975..52d0f0e 100644 --- a/internal/config/structs.go +++ b/internal/config/structs.go @@ -31,6 +31,7 @@ type Web struct { type Importer struct { Sources map[string]*Source LowMemory bool `comment:"Use less memory at the expense of import performance"` + BatchSize int `comment:"Number of items to process in each batch (affects memory usage)."` Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` UpdateAt LocalTime `comment:"Time of day (UTC) to run fetch/import process"` } diff --git a/internal/importer/main_test.go b/internal/importer/main_test.go index 84f6adf..eb155e0 100644 --- a/internal/importer/main_test.go +++ b/internal/importer/main_test.go @@ -16,6 +16,7 @@ func BenchmarkImporterLowMemory(b *testing.B) { logger := log.Configure(false) _, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{ LowMemory: true, + BatchSize: cfg.Importer.BatchSize, Logger: logger.Named("index"), }) if err != nil { diff --git a/internal/index/indexer.go b/internal/index/indexer.go index 7591aef..454a736 100644 --- a/internal/index/indexer.go +++ b/internal/index/indexer.go @@ -11,6 +11,7 @@ import ( "path/filepath" "slices" + "go.alanpearce.eu/searchix/internal/config" "go.alanpearce.eu/searchix/internal/file" "go.alanpearce.eu/searchix/internal/nix" "go.alanpearce.eu/x/log" @@ -34,13 +35,15 @@ import ( type Options struct { LowMemory bool + BatchSize int Logger *log.Logger } type WriteIndex struct { - index bleve.Index - log *log.Logger - Meta *Meta + batchSize int + index bleve.Index + log *log.Logger + Meta *Meta } type BatchError struct { @@ -51,8 +54,6 @@ func (e *BatchError) Error() string { return e.E.Error() } -var batchSize = 10_000 - func createIndexMapping() (mapping.IndexMapping, errors.E) { indexMapping := bleve.NewIndexMapping() indexMapping.StoreDynamic = false @@ -268,8 +269,12 @@ func OpenOrCreate( } } - if options.LowMemory { - batchSize = 1_000 + if options.BatchSize == 0 { + options.BatchSize = config.DefaultConfig.Importer.BatchSize + } + + if options.LowMemory && options.BatchSize == config.DefaultConfig.Importer.BatchSize { + options.BatchSize = 1_000 } return &ReadIndex{ @@ -278,9 +283,10 @@ func OpenOrCreate( meta: meta, }, &WriteIndex{ - index: idx, - log: options.Logger, - Meta: meta, + index: idx, + batchSize: options.BatchSize, + log: options.Logger, + Meta: meta, }, exists, nil @@ -337,7 +343,7 @@ func (i *WriteIndex) Import( continue } - if k++; k%batchSize == 0 { + if k++; k%i.batchSize == 0 { err = i.Flush(batch) if err != nil { errs <- err @@ -405,7 +411,7 @@ func (i *WriteIndex) DeleteBySource(source string) errors.E { var k int for _, hit := range results.Hits { batch.Delete(hit.ID) - if k++; k%batchSize == 0 { + if k++; k%i.batchSize == 0 { err := i.Flush(batch) if err != nil { return err diff --git a/internal/index/search_test.go b/internal/index/search_test.go index 339a0de..126c0a6 100644 --- a/internal/index/search_test.go +++ b/internal/index/search_test.go @@ -22,6 +22,7 @@ func TestSearchGitPackagesFirst(t *testing.T) { read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{ Logger: log.Named("index"), + BatchSize: cfg.Importer.BatchSize, LowMemory: false, }) defer read.Close() |