From 6b40e0c0fa90f11be14a93f1d6275779fd645cac Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Mon, 13 May 2024 21:50:14 +0200 Subject: refactor: combine import and web server into one binary --- import/main.go | 29 ---- internal/config/config.go | 30 +++- internal/importer/channel.go | 4 +- internal/importer/importer.go | 6 +- internal/importer/main.go | 16 +- internal/importer/nixpkgs-channel.go | 4 +- internal/index/index_meta.go | 73 +++++++++ internal/index/indexer.go | 298 +++++++++++++++++++++++++++++++++++ internal/index/search.go | 102 ++++++++++++ internal/search/index_meta.go | 73 --------- internal/search/indexer.go | 292 ---------------------------------- internal/search/search.go | 123 --------------- internal/server/mux.go | 17 +- internal/server/server.go | 5 +- justfile | 7 +- nix/modules/default.nix | 88 ++++------- nix/package.nix | 3 +- searchix.go | 116 ++++++++++++++ serve/main.go | 60 ------- 19 files changed, 673 insertions(+), 673 deletions(-) delete mode 100644 import/main.go create mode 100644 internal/index/index_meta.go create mode 100644 internal/index/indexer.go create mode 100644 internal/index/search.go delete mode 100644 internal/search/index_meta.go delete mode 100644 internal/search/indexer.go delete mode 100644 internal/search/search.go create mode 100644 searchix.go delete mode 100644 serve/main.go diff --git a/import/main.go b/import/main.go deleted file mode 100644 index 76ebdcf..0000000 --- a/import/main.go +++ /dev/null @@ -1,29 +0,0 @@ -package main - -import ( - "flag" - "log" - "log/slog" - "searchix/internal/config" - "searchix/internal/importer" -) - -var ( - replace = flag.Bool("replace", false, "whether to replace existing database, if it exists") - configFile = flag.String("config", "config.toml", "config file to use") -) - -func main() { - flag.Parse() - - cfg, err := config.GetConfig(*configFile) - if err != nil { - log.Fatal(err) - } - slog.SetLogLoggerLevel(cfg.LogLevel) - - err = importer.Start(cfg, *replace) - if err != nil { - log.Fatal(err) - } -} diff --git a/internal/config/config.go b/internal/config/config.go index c3a5a90..ec04a2c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -26,6 +26,19 @@ func (u *URL) UnmarshalText(text []byte) (err error) { return nil } +type Duration struct { + time.Duration +} + +func (d *Duration) UnmarshalText(text []byte) (err error) { + d.Duration, err = time.ParseDuration(string(text)) + if err != nil { + return errors.WithMessagef(err, "could not parse duration %s", string(text)) + } + + return nil +} + func mustURL(in string) (u URL) { var err error u.URL, err = url.Parse(in) @@ -36,6 +49,15 @@ func mustURL(in string) (u URL) { return u } +func mustLocalTime(in string) (time toml.LocalTime) { + err := time.UnmarshalText([]byte(in)) + if err != nil { + panic(errors.Errorf("Could not parse time: %s", in)) + } + + return +} + type Web struct { ContentSecurityPolicy CSP ListenAddress string @@ -48,8 +70,9 @@ type Web struct { } type Importer struct { - Sources map[string]*Source - Timeout time.Duration + Sources map[string]*Source + Timeout Duration + UpdateAt toml.LocalTime } type Config struct { @@ -73,7 +96,8 @@ var defaultConfig = Config{ }, }, Importer: &Importer{ - Timeout: 30 * time.Minute, + Timeout: Duration{30 * time.Minute}, + UpdateAt: mustLocalTime("04:00:00"), Sources: map[string]*Source{ "nixos": { Name: "NixOS", diff --git a/internal/importer/channel.go b/internal/importer/channel.go index fb6668c..1bce1b0 100644 --- a/internal/importer/channel.go +++ b/internal/importer/channel.go @@ -9,7 +9,7 @@ import ( "path" "searchix/internal/config" "searchix/internal/file" - "searchix/internal/search" + "searchix/internal/index" "strconv" "strings" @@ -81,7 +81,7 @@ func (i *ChannelImporter) FetchIfNeeded(parent context.Context) (bool, error) { return before != after, nil } -func (i *ChannelImporter) Import(parent context.Context, indexer *search.WriteIndex) (bool, error) { +func (i *ChannelImporter) Import(parent context.Context, indexer *index.WriteIndex) (bool, error) { if i.Source.OutputPath == "" { return false, errors.New("no output path specified") } diff --git a/internal/importer/importer.go b/internal/importer/importer.go index 5f251b0..a242481 100644 --- a/internal/importer/importer.go +++ b/internal/importer/importer.go @@ -5,13 +5,13 @@ import ( "log/slog" "path" "searchix/internal/config" - "searchix/internal/search" + "searchix/internal/index" "sync" ) type Importer interface { FetchIfNeeded(context.Context) (bool, error) - Import(context.Context, *search.WriteIndex) (bool, error) + Import(context.Context, *index.WriteIndex) (bool, error) } func NewNixpkgsChannelImporter( @@ -50,7 +50,7 @@ type importConfig struct { func processOptions( parent context.Context, - indexer *search.WriteIndex, + indexer *index.WriteIndex, conf *importConfig, ) (bool, error) { ctx, cancel := context.WithTimeout(parent, conf.Source.ImportTimeout) diff --git a/internal/importer/main.go b/internal/importer/main.go index a6f15e9..2d87e33 100644 --- a/internal/importer/main.go +++ b/internal/importer/main.go @@ -8,23 +8,18 @@ import ( "os/exec" "path" "searchix/internal/config" - "searchix/internal/search" + "searchix/internal/index" "strings" ) -func Start(cfg *config.Config, replace bool) error { +func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error { if len(cfg.Importer.Sources) == 0 { slog.Info("No sources enabled") return nil } - indexer, err := search.NewIndexer(cfg.DataPath, replace) - if err != nil { - log.Fatalf("Failed to create indexer: %v", err) - } - - ctx, cancel := context.WithTimeout(context.Background(), cfg.Importer.Timeout) + ctx, cancel := context.WithTimeout(context.Background(), cfg.Importer.Timeout.Duration) defer cancel() var imp Importer @@ -81,10 +76,5 @@ func Start(cfg *config.Config, replace bool) error { } } - err = indexer.Close() - if err != nil { - slog.Error("error closing indexer", "error", err) - } - return nil } diff --git a/internal/importer/nixpkgs-channel.go b/internal/importer/nixpkgs-channel.go index 7aaa816..d302154 100644 --- a/internal/importer/nixpkgs-channel.go +++ b/internal/importer/nixpkgs-channel.go @@ -9,7 +9,7 @@ import ( "path" "searchix/internal/config" "searchix/internal/file" - "searchix/internal/search" + "searchix/internal/index" "github.com/pkg/errors" ) @@ -65,7 +65,7 @@ func (i *NixpkgsChannelImporter) FetchIfNeeded(parent context.Context) (bool, er func (i *NixpkgsChannelImporter) Import( parent context.Context, - indexer *search.WriteIndex, + indexer *index.WriteIndex, ) (bool, error) { filename := path.Join(i.DataPath, filesToFetch["options"]) revFilename := path.Join(i.DataPath, filesToFetch["revision"]) diff --git a/internal/index/index_meta.go b/internal/index/index_meta.go new file mode 100644 index 0000000..e24cd3b --- /dev/null +++ b/internal/index/index_meta.go @@ -0,0 +1,73 @@ +package index + +import ( + "encoding/json" + "log/slog" + "os" + "searchix/internal/file" + + "github.com/pkg/errors" +) + +const CurrentSchemaVersion = 1 + +type Meta struct { + path string + SchemaVersion int +} + +func createMeta(path string) (*Meta, error) { + exists, err := file.Exists(path) + if err != nil { + return nil, errors.WithMessage(err, "could not check for existence of index metadata") + } + if exists { + return nil, errors.New("index metadata already exists") + } + + return &Meta{ + path: path, + SchemaVersion: CurrentSchemaVersion, + }, nil +} + +func openMeta(path string) (*Meta, error) { + j, err := os.ReadFile(path) + if err != nil { + return nil, errors.WithMessage(err, "could not open index metadata file") + } + var meta Meta + err = json.Unmarshal(j, &meta) + if err != nil { + return nil, errors.WithMessage(err, "index metadata is corrupt, try replacing the index") + } + + meta.checkSchemaVersion() + + return &meta, nil +} + +func (i *Meta) checkSchemaVersion() { + if i.SchemaVersion < CurrentSchemaVersion { + slog.Warn( + "Index schema version out of date, suggest re-indexing", + "schema_version", + i.SchemaVersion, + "latest_version", + CurrentSchemaVersion, + ) + } +} + +func (i *Meta) Save() error { + j, err := json.Marshal(i) + if err != nil { + return errors.WithMessage(err, "could not prepare index metadata for saving") + } + err = os.WriteFile(i.path, j, 0o600) + if err != nil { + return errors.WithMessage(err, "could not save index metadata") + } + + return nil +} diff --git a/internal/index/indexer.go b/internal/index/indexer.go new file mode 100644 index 0000000..63cf1a6 --- /dev/null +++ b/internal/index/indexer.go @@ -0,0 +1,298 @@ +package index + +import ( + "bytes" + "context" + "encoding/gob" + "io/fs" + "log" + "log/slog" + "os" + "path" + "searchix/internal/file" + "searchix/internal/options" + "slices" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/analyzer/web" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" + "github.com/blevesearch/bleve/v2/document" + "github.com/blevesearch/bleve/v2/mapping" + indexAPI "github.com/blevesearch/bleve_index_api" + "github.com/pkg/errors" +) + +type WriteIndex struct { + index bleve.Index + meta *Meta +} + +func createIndexMapping() (mapping.IndexMapping, error) { + indexMapping := bleve.NewIndexMapping() + indexMapping.StoreDynamic = false + indexMapping.IndexDynamic = false + indexMapping.TypeField = "BleveType" + + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + + descriptionFieldMapping := bleve.NewTextFieldMapping() + descriptionFieldMapping.Store = false + descriptionFieldMapping.Analyzer = web.Name + + err := indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ + "type": custom.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": single.Name, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + + keywordFieldMapping := bleve.NewKeywordFieldMapping() + keywordFieldMapping.Analyzer = "keyword_single" + + nameMapping := bleve.NewTextFieldMapping() + nameMapping.Analyzer = "option_name" + nameMapping.IncludeTermVectors = true + nameMapping.Store = false + + nixValueMapping := bleve.NewDocumentStaticMapping() + nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) + nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) + + locFieldMapping := bleve.NewKeywordFieldMapping() + locFieldMapping.Analyzer = "loc" + locFieldMapping.IncludeTermVectors = true + locFieldMapping.Store = false + + optionMapping := bleve.NewDocumentStaticMapping() + + optionMapping.AddFieldMappingsAt("Name", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Loc", locFieldMapping) + optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) + optionMapping.AddFieldMappingsAt("Description", textFieldMapping) + + optionMapping.AddSubDocumentMapping("Default", nixValueMapping) + optionMapping.AddSubDocumentMapping("Example", nixValueMapping) + + indexMapping.AddDocumentMapping("option", optionMapping) + + return indexMapping, nil +} + +func createIndex(indexPath string) (bleve.Index, error) { + indexMapping, err := createIndexMapping() + if err != nil { + return nil, err + } + idx, err := bleve.NewUsing( + indexPath, + indexMapping, + bleve.Config.DefaultIndexType, + bleve.Config.DefaultKVStore, + map[string]interface{}{ + "nosync": true, + }, + ) + if err != nil { + return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) + } + + return idx, nil +} + +const ( + indexBaseName = "index.bleve" + metaBaseName = "meta.json" +) + +var expectedDataFiles = []string{ + metaBaseName, + indexBaseName, + "sources", +} + +func deleteIndex(dataRoot string) error { + dir, err := os.ReadDir(dataRoot) + if err != nil { + return errors.WithMessagef(err, "could not read data directory %s", dataRoot) + } + remainingFiles := slices.DeleteFunc(dir, func(e fs.DirEntry) bool { + return slices.Contains(expectedDataFiles, e.Name()) + }) + if len(remainingFiles) > 0 { + return errors.Errorf( + "cowardly refusing to remove data directory %s as it contains unknown files: %v", + dataRoot, + remainingFiles, + ) + } + + err = os.RemoveAll(dataRoot) + if err != nil { + return errors.WithMessagef(err, "could not remove data directory %s", dataRoot) + } + + return nil +} + +func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, error) { + var err error + bleve.SetLog(log.Default()) + + indexPath := path.Join(dataRoot, indexBaseName) + metaPath := path.Join(dataRoot, metaBaseName) + + exists, err := file.Exists(indexPath) + if err != nil { + return nil, nil, exists, errors.WithMessagef( + err, + "could not check if index exists at path %s", + indexPath, + ) + } + + var idx bleve.Index + var meta *Meta + if !exists || force { + if force { + err = deleteIndex(dataRoot) + if err != nil { + return nil, nil, exists, err + } + } + idx, err = createIndex(indexPath) + if err != nil { + return nil, nil, exists, err + } + + meta, err = createMeta(metaPath) + if err != nil { + return nil, nil, exists, err + } + + err = meta.Save() + if err != nil { + return nil, nil, exists, err + } + } else { + idx, err = bleve.Open(indexPath) + if err != nil { + return nil, nil, exists, errors.WithMessagef(err, "could not open index at path %s", indexPath) + } + + meta, err = openMeta(metaPath) + if err != nil { + return nil, nil, exists, err + } + + } + + return &ReadIndex{ + idx, + meta, + }, + &WriteIndex{ + idx, + meta, + }, + exists, + nil +} + +func (i *WriteIndex) ImportOptions( + ctx context.Context, + objects <-chan *options.NixOption, +) <-chan error { + var err error + errs := make(chan error) + + go func() { + defer close(errs) + batch := i.index.NewBatch() + indexMapping := i.index.Mapping() + + outer: + for opt := range objects { + select { + case <-ctx.Done(): + slog.Debug("context cancelled") + + break outer + default: + } + + doc := document.NewDocument(opt.Source + "/" + opt.Name) + err = indexMapping.MapDocument(doc, opt) + if err != nil { + errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Name) + + continue + } + + var data bytes.Buffer + enc := gob.NewEncoder(&data) + err = enc.Encode(opt) + if err != nil { + errs <- errors.WithMessage(err, "could not store option in search index") + + continue + } + field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), indexAPI.StoreField) + newDoc := doc.AddField(field) + + // slog.Debug("adding option to index", "name", opt.Name) + err = batch.IndexAdvanced(newDoc) + + if err != nil { + errs <- errors.WithMessagef(err, "could not index option %s", opt.Name) + + continue + } + } + + size := batch.Size() + slog.Debug("flushing batch", "size", size) + + err := i.index.Batch(batch) + if err != nil { + errs <- errors.WithMessagef(err, "could not flush batch") + } + }() + + return errs +} + +func (i *WriteIndex) Close() error { + err := i.index.Close() + if err != nil { + return errors.WithMessagef(err, "could not close index") + } + + return nil +} diff --git a/internal/index/search.go b/internal/index/search.go new file mode 100644 index 0000000..d069510 --- /dev/null +++ b/internal/index/search.go @@ -0,0 +1,102 @@ +package index + +import ( + "bytes" + "context" + "encoding/gob" + "searchix/internal/options" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search" + "github.com/pkg/errors" +) + +const ResultsPerPage = 20 + +type DocumentMatch struct { + search.DocumentMatch + Data options.NixOption +} + +type Result struct { + *bleve.SearchResult + Hits []DocumentMatch +} + +type ReadIndex struct { + index bleve.Index + meta *Meta +} + +func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) { + query := bleve.NewTermQuery(name) + query.SetField("Source") + search := bleve.NewSearchRequest(query) + + result, err := index.index.SearchInContext(ctx, search) + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + if err != nil { + return nil, errors.WithMessagef( + err, + "failed to execute search to find source %s in index", + name, + ) + } + } + + return result, nil +} + +func (index *ReadIndex) Search( + ctx context.Context, + source string, + keyword string, + from uint64, +) (*Result, error) { + sourceQuery := bleve.NewTermQuery(source) + userQuery := bleve.NewMatchQuery(keyword) + userQuery.Analyzer = "option_name" + + query := bleve.NewConjunctionQuery(sourceQuery, userQuery) + + search := bleve.NewSearchRequest(query) + search.Size = ResultsPerPage + search.Fields = []string{"_data"} + + if from != 0 { + search.From = int(from) + } + + bleveResult, err := index.index.SearchInContext(ctx, search) + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + if err != nil { + return nil, errors.WithMessage(err, "failed to execute search query") + } + + results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total)) + var buf bytes.Buffer + for i, result := range bleveResult.Hits { + _, err = buf.WriteString(result.Fields["_data"].(string)) + if err != nil { + return nil, errors.WithMessage(err, "error fetching result data") + } + err = gob.NewDecoder(&buf).Decode(&results[i].Data) + if err != nil { + return nil, errors.WithMessagef(err, "error decoding gob data: %s", buf.String()) + } + buf.Reset() + } + + return &Result{ + SearchResult: bleveResult, + Hits: results, + }, nil + } +} diff --git a/internal/search/index_meta.go b/internal/search/index_meta.go deleted file mode 100644 index bb7e69f..0000000 --- a/internal/search/index_meta.go +++ /dev/null @@ -1,73 +0,0 @@ -package search - -import ( - "encoding/json" - "log/slog" - "os" - "searchix/internal/file" - - "github.com/pkg/errors" -) - -const CurrentSchemaVersion = 1 - -type IndexMeta struct { - path string - SchemaVersion int -} - -func createMeta(path string) (*IndexMeta, error) { - exists, err := file.Exists(path) - if err != nil { - return nil, errors.WithMessage(err, "could not check for existence of index metadata") - } - if exists { - return nil, errors.New("index metadata already exists") - } - - return &IndexMeta{ - path: path, - SchemaVersion: CurrentSchemaVersion, - }, nil -} - -func openMeta(path string) (*IndexMeta, error) { - j, err := os.ReadFile(path) - if err != nil { - return nil, errors.WithMessage(err, "could not open index metadata file") - } - var meta IndexMeta - err = json.Unmarshal(j, &meta) - if err != nil { - return nil, errors.WithMessage(err, "index metadata is corrupt, try replacing the index") - } - - meta.checkSchemaVersion() - - return &meta, nil -} - -func (i *IndexMeta) checkSchemaVersion() { - if i.SchemaVersion < CurrentSchemaVersion { - slog.Warn( - "Index schema version out of date, suggest re-indexing", - "schema_version", - i.SchemaVersion, - "latest_version", - CurrentSchemaVersion, - ) - } -} - -func (i *IndexMeta) Save() error { - j, err := json.Marshal(i) - if err != nil { - return errors.WithMessage(err, "could not prepare index metadata for saving") - } - err = os.WriteFile(i.path, j, 0o600) - if err != nil { - return errors.WithMessage(err, "could not save index metadata") - } - - return nil -} diff --git a/internal/search/indexer.go b/internal/search/indexer.go deleted file mode 100644 index a74189e..0000000 --- a/internal/search/indexer.go +++ /dev/null @@ -1,292 +0,0 @@ -package search - -import ( - "bytes" - "context" - "encoding/gob" - "io/fs" - "log" - "log/slog" - "os" - "path" - "searchix/internal/file" - "searchix/internal/options" - "slices" - - "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" - "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" - "github.com/blevesearch/bleve/v2/analysis/analyzer/web" - "github.com/blevesearch/bleve/v2/analysis/token/camelcase" - "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" - "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" - "github.com/blevesearch/bleve/v2/document" - "github.com/blevesearch/bleve/v2/mapping" - index "github.com/blevesearch/bleve_index_api" - "github.com/pkg/errors" -) - -type WriteIndex struct { - index bleve.Index - meta *IndexMeta -} - -func createIndexMapping() (mapping.IndexMapping, error) { - indexMapping := bleve.NewIndexMapping() - indexMapping.StoreDynamic = false - indexMapping.IndexDynamic = false - indexMapping.TypeField = "BleveType" - - textFieldMapping := bleve.NewTextFieldMapping() - textFieldMapping.Store = false - - descriptionFieldMapping := bleve.NewTextFieldMapping() - descriptionFieldMapping.Store = false - descriptionFieldMapping.Analyzer = web.Name - - err := indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ - "type": custom.Name, - "tokenizer": letter.Name, - "token_filters": []string{ - camelcase.Name, - }, - }) - if err != nil { - return nil, errors.WithMessage(err, "could not add custom analyser") - } - err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{ - "type": keyword.Name, - "tokenizer": letter.Name, - "token_filters": []string{ - camelcase.Name, - }, - }) - if err != nil { - return nil, errors.WithMessage(err, "could not add custom analyser") - } - err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{ - "type": keyword.Name, - "tokenizer": single.Name, - }) - if err != nil { - return nil, errors.WithMessage(err, "could not add custom analyser") - } - - keywordFieldMapping := bleve.NewKeywordFieldMapping() - keywordFieldMapping.Analyzer = "keyword_single" - - nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = "option_name" - nameMapping.IncludeTermVectors = true - nameMapping.Store = false - - nixValueMapping := bleve.NewDocumentStaticMapping() - nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) - nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) - - locFieldMapping := bleve.NewKeywordFieldMapping() - locFieldMapping.Analyzer = "loc" - locFieldMapping.IncludeTermVectors = true - locFieldMapping.Store = false - - optionMapping := bleve.NewDocumentStaticMapping() - - optionMapping.AddFieldMappingsAt("Name", keywordFieldMapping) - optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping) - optionMapping.AddFieldMappingsAt("Loc", locFieldMapping) - optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) - optionMapping.AddFieldMappingsAt("Description", textFieldMapping) - - optionMapping.AddSubDocumentMapping("Default", nixValueMapping) - optionMapping.AddSubDocumentMapping("Example", nixValueMapping) - - indexMapping.AddDocumentMapping("option", optionMapping) - - return indexMapping, nil -} - -func createIndex(indexPath string) (bleve.Index, error) { - indexMapping, err := createIndexMapping() - if err != nil { - return nil, err - } - idx, err := bleve.NewUsing( - indexPath, - indexMapping, - bleve.Config.DefaultIndexType, - bleve.Config.DefaultKVStore, - map[string]interface{}{ - "nosync": true, - }, - ) - if err != nil { - return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) - } - - return idx, nil -} - -const ( - indexBaseName = "index.bleve" - metaBaseName = "meta.json" -) - -var expectedDataFiles = []string{ - metaBaseName, - indexBaseName, - "sources", -} - -func deleteIndex(dataRoot string) error { - dir, err := os.ReadDir(dataRoot) - if err != nil { - return errors.WithMessagef(err, "could not read data directory %s", dataRoot) - } - remainingFiles := slices.DeleteFunc(dir, func(e fs.DirEntry) bool { - return slices.Contains(expectedDataFiles, e.Name()) - }) - if len(remainingFiles) > 0 { - return errors.Errorf( - "cowardly refusing to remove data directory %s as it contains unknown files: %v", - dataRoot, - remainingFiles, - ) - } - - err = os.RemoveAll(dataRoot) - if err != nil { - return errors.WithMessagef(err, "could not remove data directory %s", dataRoot) - } - - return nil -} - -func NewIndexer(dataRoot string, force bool) (*WriteIndex, error) { - var err error - bleve.SetLog(log.Default()) - - indexPath := path.Join(dataRoot, indexBaseName) - metaPath := path.Join(dataRoot, metaBaseName) - - exists, err := file.Exists(indexPath) - if err != nil { - return nil, errors.WithMessagef( - err, - "could not check if index exists at path %s", - indexPath, - ) - } - - var idx bleve.Index - var meta *IndexMeta - if !exists || force { - if force { - err = deleteIndex(dataRoot) - if err != nil { - return nil, err - } - } - idx, err = createIndex(indexPath) - if err != nil { - return nil, err - } - - meta, err = createMeta(metaPath) - if err != nil { - return nil, err - } - - err = meta.Save() - if err != nil { - return nil, err - } - } else { - idx, err = bleve.Open(indexPath) - if err != nil { - return nil, errors.WithMessagef(err, "could not open index at path %s", indexPath) - } - - meta, err = openMeta(metaPath) - if err != nil { - return nil, err - } - - } - - return &WriteIndex{ - idx, - meta, - }, nil -} - -func (i *WriteIndex) ImportOptions( - ctx context.Context, - objects <-chan *options.NixOption, -) <-chan error { - var err error - errs := make(chan error) - - go func() { - defer close(errs) - batch := i.index.NewBatch() - indexMapping := i.index.Mapping() - - outer: - for opt := range objects { - select { - case <-ctx.Done(): - slog.Debug("context cancelled") - - break outer - default: - } - - doc := document.NewDocument(opt.Source + "/" + opt.Name) - err = indexMapping.MapDocument(doc, opt) - if err != nil { - errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Name) - - continue - } - - var data bytes.Buffer - enc := gob.NewEncoder(&data) - err = enc.Encode(opt) - if err != nil { - errs <- errors.WithMessage(err, "could not store option in search index") - - continue - } - field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField) - newDoc := doc.AddField(field) - - // slog.Debug("adding option to index", "name", opt.Name) - err = batch.IndexAdvanced(newDoc) - - if err != nil { - errs <- errors.WithMessagef(err, "could not index option %s", opt.Name) - - continue - } - } - - size := batch.Size() - slog.Debug("flushing batch", "size", size) - - err := i.index.Batch(batch) - if err != nil { - errs <- errors.WithMessagef(err, "could not flush batch") - } - }() - - return errs -} - -func (i *WriteIndex) Close() error { - err := i.index.Close() - if err != nil { - return errors.WithMessagef(err, "could not close index") - } - - return nil -} diff --git a/internal/search/search.go b/internal/search/search.go deleted file mode 100644 index c930f15..0000000 --- a/internal/search/search.go +++ /dev/null @@ -1,123 +0,0 @@ -package search - -import ( - "bytes" - "context" - "encoding/gob" - "path" - "searchix/internal/options" - - "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/search" - "github.com/pkg/errors" -) - -const ResultsPerPage = 20 - -type DocumentMatch struct { - search.DocumentMatch - Data options.NixOption -} - -type Result struct { - *bleve.SearchResult - Hits []DocumentMatch -} - -type ReadIndex struct { - index bleve.Index - meta *IndexMeta -} - -func Open(dataRoot string) (*ReadIndex, error) { - indexPath := path.Join(dataRoot, indexBaseName) - metaPath := path.Join(dataRoot, metaBaseName) - - idx, err := bleve.Open(indexPath) - if err != nil { - return nil, errors.WithMessagef(err, "unable to open index at path %s", indexPath) - } - - meta, err := openMeta(metaPath) - if err != nil { - return nil, errors.WithMessagef(err, "unable to open metadata at path %s", metaPath) - } - - return &ReadIndex{ - idx, - meta, - }, nil -} - -func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) { - query := bleve.NewTermQuery(name) - query.SetField("Source") - search := bleve.NewSearchRequest(query) - - result, err := index.index.SearchInContext(ctx, search) - - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - if err != nil { - return nil, errors.WithMessagef( - err, - "failed to execute search to find source %s in index", - name, - ) - } - } - - return result, nil -} - -func (index *ReadIndex) Search( - ctx context.Context, - source string, - keyword string, - from uint64, -) (*Result, error) { - sourceQuery := bleve.NewTermQuery(source) - userQuery := bleve.NewMatchQuery(keyword) - userQuery.Analyzer = "option_name" - - query := bleve.NewConjunctionQuery(sourceQuery, userQuery) - - search := bleve.NewSearchRequest(query) - search.Size = ResultsPerPage - search.Fields = []string{"_data"} - - if from != 0 { - search.From = int(from) - } - - bleveResult, err := index.index.SearchInContext(ctx, search) - select { - case <-ctx.Done(): - return nil, ctx.Err() - default: - if err != nil { - return nil, errors.WithMessage(err, "failed to execute search query") - } - - results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total)) - var buf bytes.Buffer - for i, result := range bleveResult.Hits { - _, err = buf.WriteString(result.Fields["_data"].(string)) - if err != nil { - return nil, errors.WithMessage(err, "error fetching result data") - } - err = gob.NewDecoder(&buf).Decode(&results[i].Data) - if err != nil { - return nil, errors.WithMessagef(err, "error decoding gob data: %s", buf.String()) - } - buf.Reset() - } - - return &Result{ - SearchResult: bleveResult, - Hits: results, - }, nil - } -} diff --git a/internal/server/mux.go b/internal/server/mux.go index 9d3b29a..582d154 100644 --- a/internal/server/mux.go +++ b/internal/server/mux.go @@ -16,8 +16,8 @@ import ( "searchix/frontend" "searchix/internal/config" + search "searchix/internal/index" "searchix/internal/options" - "searchix/internal/search" "github.com/blevesearch/bleve/v2" "github.com/getsentry/sentry-go" @@ -63,15 +63,12 @@ func applyDevModeOverrides(config *config.Config) { ) } -func NewMux(config *config.Config, liveReload bool) (*http.ServeMux, error) { - slog.Debug("loading index") - index, err := search.Open(config.DataPath) - slog.Debug("loaded index") - if err != nil { - log.Fatalf("could not open search index, error: %#v", err) - } - - err = sentry.Init(sentry.ClientOptions{ +func NewMux( + config *config.Config, + index *search.ReadIndex, + liveReload bool, +) (*http.ServeMux, error) { + err := sentry.Init(sentry.ClientOptions{ EnableTracing: true, TracesSampleRate: 1.0, Dsn: config.Web.SentryDSN, diff --git a/internal/server/server.go b/internal/server/server.go index 77163d3..bb0a6ad 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -7,6 +7,7 @@ import ( "net" "net/http" "searchix/internal/config" + "searchix/internal/index" "time" "github.com/pkg/errors" @@ -16,8 +17,8 @@ type Server struct { *http.Server } -func New(conf *config.Config, liveReload bool) (*Server, error) { - mux, err := NewMux(conf, liveReload) +func New(conf *config.Config, index *index.ReadIndex, liveReload bool) (*Server, error) { + mux, err := NewMux(conf, index, liveReload) if err != nil { return nil, err } diff --git a/justfile b/justfile index 8883577..7a3988c 100644 --- a/justfile +++ b/justfile @@ -21,10 +21,7 @@ precommit: nix-build -A pre-commit-check dev: - watchexec -e go -r wgo run -exit ./serve/ --live --config config.toml - -index: - wgo run -exit ./import/ --config config.toml + watchexec -e go -r wgo run -exit ./ --live --config config.toml reindex: - wgo run -exit ./import/ --config config.toml --replace + wgo run -exit . --config config.toml --replace diff --git a/nix/modules/default.nix b/nix/modules/default.nix index 62cc5c0..e082a29 100644 --- a/nix/modules/default.nix +++ b/nix/modules/default.nix @@ -70,13 +70,8 @@ in description = "Home directory for searchix user"; }; - dates = mkOption { - type = types.singleLineStr; - default = "04:00"; - example = "weekly"; - }; - settings = mkOption { + default = { }; type = types.submodule { freeformType = settingsFormat.type; options = { @@ -115,9 +110,9 @@ in }; sentryDSN = mkOption { - type = with types; nullOr str; + type = types.str; description = "Optionally enable sentry to track errors."; - default = null; + default = ""; }; }; }; @@ -127,26 +122,36 @@ in type = types.submodule { freeformType = settingsFormat.type; - importTimeout = mkOption { - type = types.str; - default = "30m"; - description = '' - Maximum time to wait for all import jobs. - May need to be increased based on the number of sources. - ''; - }; + options = { + timeout = mkOption { + type = types.str; + default = "30m"; + description = '' + Maximum time to wait for all import jobs. + May need to be increased based on the number of sources. + ''; + }; + + updateAt = mkOption { + type = types.strMatching "[[:digit:]]{2}:[[:digit:]]{2}:[[:digit:]]{2}"; + default = "04:00:00"; + example = "02:00:00"; + description = "Time of day to fetch and import new options."; + }; - sources = mkOption { - type = with types; - attrsOf (submodule (import ./source-options.nix { - inherit cfg settingsFormat; - })); - default = { - nixos.enable = true; - darwin.enable = false; - home-manager.enable = false; + sources = mkOption { + type = with types; + attrsOf (submodule (import ./source-options.nix { + inherit cfg settingsFormat; + })); + default = { + nixos.enable = true; + darwin.enable = false; + home-manager.enable = false; + }; + description = "Declarative specification of options sources for searchix."; }; - description = "Declarative specification of options sources for searchix."; + }; }; }; @@ -158,37 +163,12 @@ in }; config = mkIf cfg.enable { - systemd.services.searchix-importer = { - description = "Searchix option importer"; - conflicts = [ "searchix-web.service" ]; - path = with pkgs; [ nix ]; - serviceConfig = defaultServiceConfig // { - ExecStart = "${package}/bin/import --config ${(settingsFormat.generate "searchix-config.toml" cfg.settings)}"; - Type = "oneshot"; - - RestartSec = 10; - RestartSteps = 5; - RestartMaxDelaySec = "5 min"; - }; - - startAt = cfg.dates; - }; - - systemd.timers.searchix-importer = { - timerConfig = { - Persistent = true; - RandomizedDelaySec = 1800; - }; - }; - - systemd.services.searchix-web = { + systemd.services.searchix = { description = "Searchix Nix option search"; - after = [ "searchix-importer.service" ]; - wants = [ "searchix-importer.service" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = defaultServiceConfig // { - ExecStart = "${package}/bin/serve --config ${(settingsFormat.generate "searchix-config.toml" cfg.settings)}"; - } // lib.optionalAttrs (cfg.port < 1024) { + ExecStart = "${package}/bin/searchix --config ${(settingsFormat.generate "searchix-config.toml" cfg.settings)}"; + } // lib.optionalAttrs (cfg.settings.web.port < 1024) { AmbientCapabilities = [ "CAP_NET_BIND_SERVICE" ]; CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; }; diff --git a/nix/package.nix b/nix/package.nix index 8aff1c8..8e1cf54 100644 --- a/nix/package.nix +++ b/nix/package.nix @@ -21,8 +21,7 @@ buildGoApplication { (unions [ ../go.mod ../go.sum - ../serve - ../import + ../searchix.go ../internal ../frontend ]) diff --git a/searchix.go b/searchix.go new file mode 100644 index 0000000..9a0b16e --- /dev/null +++ b/searchix.go @@ -0,0 +1,116 @@ +package main + +import ( + "flag" + "log" + "log/slog" + "os" + "os/signal" + "sync" + "time" + + "searchix/internal/config" + "searchix/internal/importer" + "searchix/internal/index" + "searchix/internal/server" + + "github.com/pelletier/go-toml/v2" +) + +var ( + configFile = flag.String("config", "config.toml", "config file to use") + liveReload = flag.Bool("live", false, "whether to enable live reloading (development)") + replace = flag.Bool("replace", false, "whether to replace existing database, if it exists") +) + +func nextOccurrenceOfLocalTime(t toml.LocalTime) time.Time { + now := time.Now() + dayTime := t + nextRun := time.Date( + now.Year(), + now.Month(), + now.Day(), + dayTime.Hour, + dayTime.Minute, + dayTime.Second, + 0, + time.Local, + ) + if nextRun.Before(now) { + return nextRun.AddDate(0, 0, 1) + } + + return nextRun +} + +func main() { + flag.Parse() + + conf, err := config.GetConfig(*configFile) + if err != nil { + log.Panicf("error parsing configuration file: %v", err) + } + slog.SetLogLoggerLevel(conf.LogLevel) + log.SetFlags(log.LstdFlags | log.Lmsgprefix) + log.SetPrefix("searchix: ") + + read, write, exists, err := index.OpenOrCreate(conf.DataPath, *replace) + if err != nil { + log.Fatalf("Failed to open or create index: %v", err) + } + + if !exists { + err = importer.Start(conf, write, *replace) + if err != nil { + log.Fatalf("Failed to build index: %v", err) + } + } + + c := make(chan os.Signal, 2) + signal.Notify(c, os.Interrupt) + sv, err := server.New(conf, read, *liveReload) + if err != nil { + log.Fatalf("error setting up server: %v", err) + } + wg := &sync.WaitGroup{} + wg.Add(1) + go func() { + defer wg.Done() + sig := <-c + log.Printf("signal captured: %v", sig) + <-sv.Stop() + slog.Debug("server stopped") + }() + + go func() { + nextRun := nextOccurrenceOfLocalTime(conf.Importer.UpdateAt) + for { + slog.Debug("scheduling next run", "next-run", nextRun) + <-time.After(time.Until(nextRun)) + wg.Add(1) + slog.Info("updating index") + err = importer.Start(conf, write, false) + wg.Done() + if err != nil { + slog.Warn("error updating index", "error", err) + } else { + slog.Info("update complete") + } + nextRun = nextRun.AddDate(0, 0, 1) + } + }() + + sErr := make(chan error) + wg.Add(1) + go func() { + defer wg.Done() + sErr <- sv.Start() + }() + + err = <-sErr + if err != nil { + // Error starting or closing listener: + log.Fatalf("error: %v", err) + } + wg.Wait() +} diff --git a/serve/main.go b/serve/main.go deleted file mode 100644 index 709d340..0000000 --- a/serve/main.go +++ /dev/null @@ -1,60 +0,0 @@ -package main - -import ( - "flag" - "log" - "log/slog" - "os" - "os/signal" - "sync" - - "searchix/internal/config" - "searchix/internal/server" -) - -var ( - liveReload = flag.Bool("live", false, "whether to enable live reloading (development)") - configFile = flag.String("config", "config.toml", "config file to use") -) - -func main() { - flag.Parse() - - conf, err := config.GetConfig(*configFile) - if err != nil { - log.Panicf("error parsing configuration file: %v", err) - } - slog.SetLogLoggerLevel(conf.LogLevel) - log.SetFlags(log.LstdFlags | log.Lmsgprefix) - log.SetPrefix("searchix: ") - - c := make(chan os.Signal, 2) - signal.Notify(c, os.Interrupt) - sv, err := server.New(conf, *liveReload) - if err != nil { - log.Fatalf("error setting up server: %v", err) - } - wg := &sync.WaitGroup{} - wg.Add(1) - go func() { - defer wg.Done() - sig := <-c - log.Printf("signal captured: %v", sig) - <-sv.Stop() - slog.Debug("server stopped") - }() - - sErr := make(chan error) - wg.Add(1) - go func() { - defer wg.Done() - sErr <- sv.Start() - }() - - err = <-sErr - if err != nil { - // Error starting or closing listener: - log.Fatalf("error: %v", err) - } - wg.Wait() -} -- cgit 1.4.1