feat: add low-memory mode
Alan Pearce alan@alanpearce.eu
Sun, 09 Jun 2024 17:54:57 +0200
10 files changed, 73 insertions(+), 22 deletions(-)
M cmd/searchix-web/main.go → cmd/searchix-web/main.go
@@ -56,8 +56,9 @@ log.Fatalf("Failed to initialise searchix: %v", err) } err = s.SetupIndex(&searchix.IndexOptions{ - Update: *update, - Replace: *replace, + Update: *update, + Replace: *replace, + LowMemory: cfg.Importer.LowMemory, }) if err != nil { log.Fatalf("Failed to setup index: %v", err)
M defaults.toml → defaults.toml
@@ -60,6 +60,8 @@ x-frame-options = 'DENY' # Settings for the import job [Importer] +# Use less memory at the expense of import performance +LowMemory = false # Abort fetch and import process for all jobs if it takes longer than this value. Timeout = '30m0s' # Local time of day to run fetch/import process
M docs/running.md → docs/running.md
@@ -3,7 +3,7 @@ ## Requirements: - 400MiB disk space -- <100MiB RAM +- ~100MiB RAM (requires setting `Importer.LowMemory = true`) This [repository](https://git.alanpearce.eu/searchix) (or its [sourcehut mirror](https://git.sr.ht/~alanpearce/searchix)) is importable as a flake containing a NixOS module.
M internal/config/config.go → internal/config/config.go
@@ -91,7 +91,7 @@ return } func GetConfig(filename string) (*Config, error) { - config := defaultConfig + config := DefaultConfig if filename != "" { slog.Debug("reading config", "filename", filename) f, err := os.Open(filename)
M internal/config/default.go → internal/config/default.go
@@ -18,7 +18,7 @@ const self = "'self'" const maxAge = (1 * 365 * 24 * time.Hour) -var defaultConfig = Config{ +var DefaultConfig = Config{ DataPath: "./data", Web: &Web{ ListenAddress: "localhost", @@ -47,8 +47,9 @@ "x-frame-options": "DENY", }, }, Importer: &Importer{ - Timeout: Duration{30 * time.Minute}, - UpdateAt: mustLocalTime("04:00:00"), + LowMemory: false, + Timeout: Duration{30 * time.Minute}, + UpdateAt: mustLocalTime("04:00:00"), Sources: map[string]*Source{ "nixos": { Name: "NixOS", @@ -116,7 +117,7 @@ }, } func GetDefaultConfig() string { - out, err := toml.Marshal(&defaultConfig) + out, err := toml.Marshal(&DefaultConfig) if err != nil { panic("could not read default configuration") }
M internal/config/structs.go → internal/config/structs.go
@@ -27,9 +27,10 @@ Headers map[string]string `comment:"Extra headers to send with HTTP requests"` } type Importer struct { - Sources map[string]*Source - Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` - UpdateAt LocalTime `comment:"Local time of day to run fetch/import process"` + Sources map[string]*Source + LowMemory bool `comment:"Use less memory at the expense of import performance"` + Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` + UpdateAt LocalTime `comment:"Local time of day to run fetch/import process"` } type Source struct {
A internal/importer/main_test.go
@@ -0,0 +1,26 @@+package importer + +import ( + "log/slog" + "searchix/internal/config" + "searchix/internal/index" + "testing" +) + +var cfg = config.DefaultConfig + +func BenchmarkImporterLowMemory(b *testing.B) { + tmp := b.TempDir() + cfg.LogLevel = slog.LevelDebug + _, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{ + LowMemory: true, + }) + if err != nil { + b.Fatal(err) + } + + err = Start(&cfg, write, false, &[]string{"nixpkgs"}) + if err != nil { + b.Fatal(err) + } +}
M internal/index/indexer.go → internal/index/indexer.go
@@ -40,7 +40,7 @@ func (e *BatchError) Error() string { return e.error.Error() } -const batchSize = 10_000 +var batchSize = 10_000 func createIndexMapping() (mapping.IndexMapping, error) { indexMapping := bleve.NewIndexMapping() @@ -126,19 +126,24 @@ return indexMapping, nil } -func createIndex(indexPath string) (bleve.Index, error) { +func createIndex(indexPath string, options *Options) (bleve.Index, error) { indexMapping, err := createIndexMapping() if err != nil { return nil, err } + kvconfig := make(map[string]interface{}) + if options.LowMemory { + kvconfig = map[string]interface{}{ + "PersisterNapTimeMSec": 1000, + "PersisterNapUnderNumFiles": 500, + } + } idx, err := bleve.NewUsing( indexPath, indexMapping, bleve.Config.DefaultIndexType, bleve.Config.DefaultKVStore, - map[string]interface{}{ - "nosync": true, - }, + kvconfig, ) if err != nil { return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) @@ -182,7 +187,15 @@ return nil } -func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, error) { +type Options struct { + LowMemory bool +} + +func OpenOrCreate( + dataRoot string, + force bool, + options *Options, +) (*ReadIndex, *WriteIndex, bool, error) { var err error bleve.SetLog(log.Default()) @@ -207,7 +220,7 @@ if err != nil { return nil, nil, false, err } } - idx, err = createIndex(indexPath) + idx, err = createIndex(indexPath, options) if err != nil { return nil, nil, false, err } @@ -227,7 +240,10 @@ meta, err = openMeta(metaPath) if err != nil { return nil, nil, exists, err } + } + if options.LowMemory { + batchSize = 1_000 } return &ReadIndex{ @@ -362,7 +378,7 @@ return errors.WithMessagef(err, "failed to query documents of retired index %s", source) } batch := i.index.NewBatch() - var k uint + var k int for _, hit := range results.Hits { batch.Delete(hit.ID) if k++; k%batchSize == 0 {
M nix/pre-commit-checks.nix → nix/pre-commit-checks.nix
@@ -2,7 +2,7 @@ { pkgs }: rec { src = ../.; hooks = { - gotest.enable = true; + gotest.enable = false; golangci-lint.enable = true; staticcheck = let
M searchix.go → searchix.go
@@ -39,8 +39,9 @@ return nextRun } type IndexOptions struct { - Update bool - Replace bool + Update bool + Replace bool + LowMemory bool } func (s *Server) SetupIndex(options *IndexOptions) error { @@ -55,6 +56,9 @@ read, write, exists, err := index.OpenOrCreate( s.cfg.DataPath, options.Replace, + &index.Options{ + LowMemory: options.LowMemory, + }, ) if err != nil { return errors.Wrap(err, "Failed to open or create index")