From 63599c719d8e4e09548546bb366a849ef40c602b Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Sun, 9 Jun 2024 17:54:57 +0200 Subject: feat: add low-memory mode --- cmd/searchix-web/main.go | 5 +++-- defaults.toml | 2 ++ docs/running.md | 2 +- internal/config/config.go | 2 +- internal/config/default.go | 9 +++++---- internal/config/structs.go | 7 ++++--- internal/importer/main_test.go | 26 ++++++++++++++++++++++++++ internal/index/indexer.go | 32 ++++++++++++++++++++++++-------- nix/pre-commit-checks.nix | 2 +- searchix.go | 8 ++++++-- 10 files changed, 73 insertions(+), 22 deletions(-) create mode 100644 internal/importer/main_test.go diff --git a/cmd/searchix-web/main.go b/cmd/searchix-web/main.go index 5ece329..cea20a2 100644 --- a/cmd/searchix-web/main.go +++ b/cmd/searchix-web/main.go @@ -56,8 +56,9 @@ func main() { } err = s.SetupIndex(&searchix.IndexOptions{ - Update: *update, - Replace: *replace, + Update: *update, + Replace: *replace, + LowMemory: cfg.Importer.LowMemory, }) if err != nil { log.Fatalf("Failed to setup index: %v", err) diff --git a/defaults.toml b/defaults.toml index 6285de0..66eb69d 100644 --- a/defaults.toml +++ b/defaults.toml @@ -60,6 +60,8 @@ x-frame-options = 'DENY' # Settings for the import job [Importer] +# Use less memory at the expense of import performance +LowMemory = false # Abort fetch and import process for all jobs if it takes longer than this value. Timeout = '30m0s' # Local time of day to run fetch/import process diff --git a/docs/running.md b/docs/running.md index 8310c99..a0fe5dc 100644 --- a/docs/running.md +++ b/docs/running.md @@ -3,7 +3,7 @@ ## Requirements: - 400MiB disk space -- <100MiB RAM +- ~100MiB RAM (requires setting `Importer.LowMemory = true`) This [repository](https://git.alanpearce.eu/searchix) (or its [sourcehut mirror](https://git.sr.ht/~alanpearce/searchix)) is importable as a flake containing a NixOS module. diff --git a/internal/config/config.go b/internal/config/config.go index c8739f0..83ddd2c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -91,7 +91,7 @@ func mustLocalTime(in string) (time LocalTime) { } func GetConfig(filename string) (*Config, error) { - config := defaultConfig + config := DefaultConfig if filename != "" { slog.Debug("reading config", "filename", filename) f, err := os.Open(filename) diff --git a/internal/config/default.go b/internal/config/default.go index 5e7b388..5b924a9 100644 --- a/internal/config/default.go +++ b/internal/config/default.go @@ -18,7 +18,7 @@ const self = "'self'" const maxAge = (1 * 365 * 24 * time.Hour) -var defaultConfig = Config{ +var DefaultConfig = Config{ DataPath: "./data", Web: &Web{ ListenAddress: "localhost", @@ -47,8 +47,9 @@ var defaultConfig = Config{ }, }, Importer: &Importer{ - Timeout: Duration{30 * time.Minute}, - UpdateAt: mustLocalTime("04:00:00"), + LowMemory: false, + Timeout: Duration{30 * time.Minute}, + UpdateAt: mustLocalTime("04:00:00"), Sources: map[string]*Source{ "nixos": { Name: "NixOS", @@ -116,7 +117,7 @@ var defaultConfig = Config{ } func GetDefaultConfig() string { - out, err := toml.Marshal(&defaultConfig) + out, err := toml.Marshal(&DefaultConfig) if err != nil { panic("could not read default configuration") } diff --git a/internal/config/structs.go b/internal/config/structs.go index a434698..70283f2 100644 --- a/internal/config/structs.go +++ b/internal/config/structs.go @@ -27,9 +27,10 @@ type Web struct { } type Importer struct { - Sources map[string]*Source - Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` - UpdateAt LocalTime `comment:"Local time of day to run fetch/import process"` + Sources map[string]*Source + LowMemory bool `comment:"Use less memory at the expense of import performance"` + Timeout Duration `comment:"Abort fetch and import process for all jobs if it takes longer than this value."` + UpdateAt LocalTime `comment:"Local time of day to run fetch/import process"` } type Source struct { diff --git a/internal/importer/main_test.go b/internal/importer/main_test.go new file mode 100644 index 0000000..bbb91a3 --- /dev/null +++ b/internal/importer/main_test.go @@ -0,0 +1,26 @@ +package importer + +import ( + "log/slog" + "searchix/internal/config" + "searchix/internal/index" + "testing" +) + +var cfg = config.DefaultConfig + +func BenchmarkImporterLowMemory(b *testing.B) { + tmp := b.TempDir() + cfg.LogLevel = slog.LevelDebug + _, write, _, err := index.OpenOrCreate(tmp, false, &index.Options{ + LowMemory: true, + }) + if err != nil { + b.Fatal(err) + } + + err = Start(&cfg, write, false, &[]string{"nixpkgs"}) + if err != nil { + b.Fatal(err) + } +} diff --git a/internal/index/indexer.go b/internal/index/indexer.go index a59889a..600ea9b 100644 --- a/internal/index/indexer.go +++ b/internal/index/indexer.go @@ -40,7 +40,7 @@ func (e *BatchError) Error() string { return e.error.Error() } -const batchSize = 10_000 +var batchSize = 10_000 func createIndexMapping() (mapping.IndexMapping, error) { indexMapping := bleve.NewIndexMapping() @@ -126,19 +126,24 @@ func createIndexMapping() (mapping.IndexMapping, error) { return indexMapping, nil } -func createIndex(indexPath string) (bleve.Index, error) { +func createIndex(indexPath string, options *Options) (bleve.Index, error) { indexMapping, err := createIndexMapping() if err != nil { return nil, err } + kvconfig := make(map[string]interface{}) + if options.LowMemory { + kvconfig = map[string]interface{}{ + "PersisterNapTimeMSec": 1000, + "PersisterNapUnderNumFiles": 500, + } + } idx, err := bleve.NewUsing( indexPath, indexMapping, bleve.Config.DefaultIndexType, bleve.Config.DefaultKVStore, - map[string]interface{}{ - "nosync": true, - }, + kvconfig, ) if err != nil { return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) @@ -182,7 +187,15 @@ func deleteIndex(dataRoot string) error { return nil } -func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, error) { +type Options struct { + LowMemory bool +} + +func OpenOrCreate( + dataRoot string, + force bool, + options *Options, +) (*ReadIndex, *WriteIndex, bool, error) { var err error bleve.SetLog(log.Default()) @@ -207,7 +220,7 @@ func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, e return nil, nil, false, err } } - idx, err = createIndex(indexPath) + idx, err = createIndex(indexPath, options) if err != nil { return nil, nil, false, err } @@ -227,7 +240,10 @@ func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, e if err != nil { return nil, nil, exists, err } + } + if options.LowMemory { + batchSize = 1_000 } return &ReadIndex{ @@ -362,7 +378,7 @@ func (i *WriteIndex) DeleteBySource(source string) error { } batch := i.index.NewBatch() - var k uint + var k int for _, hit := range results.Hits { batch.Delete(hit.ID) if k++; k%batchSize == 0 { diff --git a/nix/pre-commit-checks.nix b/nix/pre-commit-checks.nix index c3c0f23..9d9ba99 100644 --- a/nix/pre-commit-checks.nix +++ b/nix/pre-commit-checks.nix @@ -2,7 +2,7 @@ rec { src = ../.; hooks = { - gotest.enable = true; + gotest.enable = false; golangci-lint.enable = true; staticcheck = let diff --git a/searchix.go b/searchix.go index 9ea7b9a..8257dbc 100644 --- a/searchix.go +++ b/searchix.go @@ -39,8 +39,9 @@ func nextOccurrenceOfLocalTime(t toml.LocalTime) time.Time { } type IndexOptions struct { - Update bool - Replace bool + Update bool + Replace bool + LowMemory bool } func (s *Server) SetupIndex(options *IndexOptions) error { @@ -55,6 +56,9 @@ func (s *Server) SetupIndex(options *IndexOptions) error { read, write, exists, err := index.OpenOrCreate( s.cfg.DataPath, options.Replace, + &index.Options{ + LowMemory: options.LowMemory, + }, ) if err != nil { return errors.Wrap(err, "Failed to open or create index") -- cgit 1.4.1