about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2025-03-27 17:03:10 +0100
committerAlan Pearce2025-03-27 17:03:10 +0100
commitf38ccb5ec8149072c93a6c3173da06ba1d724c4c (patch)
tree9b7d2a560e2dcbcf6fd2599a7ef8f6bd29980b72
parent75c2afdf214b0ddc57efe8e621eb172506c0b0f7 (diff)
downloadsearchix-f38ccb5ec8149072c93a6c3173da06ba1d724c4c.tar.lz
searchix-f38ccb5ec8149072c93a6c3173da06ba1d724c4c.tar.zst
searchix-f38ccb5ec8149072c93a6c3173da06ba1d724c4c.zip
feat: make index batch size configurable
-rw-r--r--cmd/searchix-web/main.go1
-rw-r--r--defaults.toml2
-rw-r--r--internal/config/default.go1
-rw-r--r--internal/config/structs.go1
-rw-r--r--internal/importer/main_test.go1
-rw-r--r--internal/index/indexer.go30
-rw-r--r--internal/index/search_test.go1
7 files changed, 25 insertions, 12 deletions
diff --git a/cmd/searchix-web/main.go b/cmd/searchix-web/main.go
index 20d4a94..061e08d 100644
--- a/cmd/searchix-web/main.go
+++ b/cmd/searchix-web/main.go
@@ -77,6 +77,7 @@ func main() {
 		*replace,
 		&index.Options{
 			LowMemory: cfg.Importer.LowMemory,
+			BatchSize: cfg.Importer.BatchSize,
 			Logger:    logger.Named("index"),
 		},
 	)
diff --git a/defaults.toml b/defaults.toml
index f75aae6..2ceb0eb 100644
--- a/defaults.toml
+++ b/defaults.toml
@@ -64,6 +64,8 @@ x-frame-options = 'DENY'
 [Importer]
 # Use less memory at the expense of import performance
 LowMemory = false
+# Number of items to process in each batch (affects memory usage).
+BatchSize = 10000
 # Abort fetch and import process for all jobs if it takes longer than this value.
 Timeout = '30m0s'
 # Time of day (UTC) to run fetch/import process
diff --git a/internal/config/default.go b/internal/config/default.go
index 5260fe9..0ec0735 100644
--- a/internal/config/default.go
+++ b/internal/config/default.go
@@ -49,6 +49,7 @@ var DefaultConfig = Config{
 	},
 	Importer: &Importer{
 		LowMemory: false,
+		BatchSize: 10_000,
 		Timeout:   Duration{30 * time.Minute},
 		UpdateAt:  mustLocalTime("03:00:00"),
 		Sources: map[string]*Source{
diff --git a/internal/config/structs.go b/internal/config/structs.go
index 30ad975..52d0f0e 100644
--- a/internal/config/structs.go
+++ b/internal/config/structs.go
@@ -31,6 +31,7 @@ type Web struct {
 type Importer struct {
 	Sources   map[string]*Source
 	LowMemory bool      `comment:"Use less memory at the expense of import performance"`
+	BatchSize int       `comment:"Number of items to process in each batch (affects memory usage)."`
 	Timeout   Duration  `comment:"Abort fetch and import process for all jobs if it takes longer than this value."`
 	UpdateAt  LocalTime `comment:"Time of day (UTC) to run fetch/import process"`
 }
diff --git a/internal/importer/main_test.go b/internal/importer/main_test.go
index 84f6adf..eb155e0 100644
--- a/internal/importer/main_test.go
+++ b/internal/importer/main_test.go
@@ -16,6 +16,7 @@ func BenchmarkImporterLowMemory(b *testing.B) {
 	logger := log.Configure(false)
 	_, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{
 		LowMemory: true,
+		BatchSize: cfg.Importer.BatchSize,
 		Logger:    logger.Named("index"),
 	})
 	if err != nil {
diff --git a/internal/index/indexer.go b/internal/index/indexer.go
index 7591aef..454a736 100644
--- a/internal/index/indexer.go
+++ b/internal/index/indexer.go
@@ -11,6 +11,7 @@ import (
 	"path/filepath"
 	"slices"
 
+	"go.alanpearce.eu/searchix/internal/config"
 	"go.alanpearce.eu/searchix/internal/file"
 	"go.alanpearce.eu/searchix/internal/nix"
 	"go.alanpearce.eu/x/log"
@@ -34,13 +35,15 @@ import (
 
 type Options struct {
 	LowMemory bool
+	BatchSize int
 	Logger    *log.Logger
 }
 
 type WriteIndex struct {
-	index bleve.Index
-	log   *log.Logger
-	Meta  *Meta
+	batchSize int
+	index     bleve.Index
+	log       *log.Logger
+	Meta      *Meta
 }
 
 type BatchError struct {
@@ -51,8 +54,6 @@ func (e *BatchError) Error() string {
 	return e.E.Error()
 }
 
-var batchSize = 10_000
-
 func createIndexMapping() (mapping.IndexMapping, errors.E) {
 	indexMapping := bleve.NewIndexMapping()
 	indexMapping.StoreDynamic = false
@@ -268,8 +269,12 @@ func OpenOrCreate(
 		}
 	}
 
-	if options.LowMemory {
-		batchSize = 1_000
+	if options.BatchSize == 0 {
+		options.BatchSize = config.DefaultConfig.Importer.BatchSize
+	}
+
+	if options.LowMemory && options.BatchSize == config.DefaultConfig.Importer.BatchSize {
+		options.BatchSize = 1_000
 	}
 
 	return &ReadIndex{
@@ -278,9 +283,10 @@ func OpenOrCreate(
 			meta:  meta,
 		},
 		&WriteIndex{
-			index: idx,
-			log:   options.Logger,
-			Meta:  meta,
+			index:     idx,
+			batchSize: options.BatchSize,
+			log:       options.Logger,
+			Meta:      meta,
 		},
 		exists,
 		nil
@@ -337,7 +343,7 @@ func (i *WriteIndex) Import(
 				continue
 			}
 
-			if k++; k%batchSize == 0 {
+			if k++; k%i.batchSize == 0 {
 				err = i.Flush(batch)
 				if err != nil {
 					errs <- err
@@ -405,7 +411,7 @@ func (i *WriteIndex) DeleteBySource(source string) errors.E {
 	var k int
 	for _, hit := range results.Hits {
 		batch.Delete(hit.ID)
-		if k++; k%batchSize == 0 {
+		if k++; k%i.batchSize == 0 {
 			err := i.Flush(batch)
 			if err != nil {
 				return err
diff --git a/internal/index/search_test.go b/internal/index/search_test.go
index 339a0de..126c0a6 100644
--- a/internal/index/search_test.go
+++ b/internal/index/search_test.go
@@ -22,6 +22,7 @@ func TestSearchGitPackagesFirst(t *testing.T) {
 
 	read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{
 		Logger:    log.Named("index"),
+		BatchSize: cfg.Importer.BatchSize,
 		LowMemory: false,
 	})
 	defer read.Close()