From 6b40e0c0fa90f11be14a93f1d6275779fd645cac Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Mon, 13 May 2024 21:50:14 +0200 Subject: refactor: combine import and web server into one binary --- internal/index/index_meta.go | 73 +++++++++++ internal/index/indexer.go | 298 +++++++++++++++++++++++++++++++++++++++++++ internal/index/search.go | 102 +++++++++++++++ 3 files changed, 473 insertions(+) create mode 100644 internal/index/index_meta.go create mode 100644 internal/index/indexer.go create mode 100644 internal/index/search.go (limited to 'internal/index') diff --git a/internal/index/index_meta.go b/internal/index/index_meta.go new file mode 100644 index 0000000..e24cd3b --- /dev/null +++ b/internal/index/index_meta.go @@ -0,0 +1,73 @@ +package index + +import ( + "encoding/json" + "log/slog" + "os" + "searchix/internal/file" + + "github.com/pkg/errors" +) + +const CurrentSchemaVersion = 1 + +type Meta struct { + path string + SchemaVersion int +} + +func createMeta(path string) (*Meta, error) { + exists, err := file.Exists(path) + if err != nil { + return nil, errors.WithMessage(err, "could not check for existence of index metadata") + } + if exists { + return nil, errors.New("index metadata already exists") + } + + return &Meta{ + path: path, + SchemaVersion: CurrentSchemaVersion, + }, nil +} + +func openMeta(path string) (*Meta, error) { + j, err := os.ReadFile(path) + if err != nil { + return nil, errors.WithMessage(err, "could not open index metadata file") + } + var meta Meta + err = json.Unmarshal(j, &meta) + if err != nil { + return nil, errors.WithMessage(err, "index metadata is corrupt, try replacing the index") + } + + meta.checkSchemaVersion() + + return &meta, nil +} + +func (i *Meta) checkSchemaVersion() { + if i.SchemaVersion < CurrentSchemaVersion { + slog.Warn( + "Index schema version out of date, suggest re-indexing", + "schema_version", + i.SchemaVersion, + "latest_version", + CurrentSchemaVersion, + ) + } +} + +func (i *Meta) Save() error { + j, err := json.Marshal(i) + if err != nil { + return errors.WithMessage(err, "could not prepare index metadata for saving") + } + err = os.WriteFile(i.path, j, 0o600) + if err != nil { + return errors.WithMessage(err, "could not save index metadata") + } + + return nil +} diff --git a/internal/index/indexer.go b/internal/index/indexer.go new file mode 100644 index 0000000..63cf1a6 --- /dev/null +++ b/internal/index/indexer.go @@ -0,0 +1,298 @@ +package index + +import ( + "bytes" + "context" + "encoding/gob" + "io/fs" + "log" + "log/slog" + "os" + "path" + "searchix/internal/file" + "searchix/internal/options" + "slices" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/analyzer/web" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" + "github.com/blevesearch/bleve/v2/document" + "github.com/blevesearch/bleve/v2/mapping" + indexAPI "github.com/blevesearch/bleve_index_api" + "github.com/pkg/errors" +) + +type WriteIndex struct { + index bleve.Index + meta *Meta +} + +func createIndexMapping() (mapping.IndexMapping, error) { + indexMapping := bleve.NewIndexMapping() + indexMapping.StoreDynamic = false + indexMapping.IndexDynamic = false + indexMapping.TypeField = "BleveType" + + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + + descriptionFieldMapping := bleve.NewTextFieldMapping() + descriptionFieldMapping.Store = false + descriptionFieldMapping.Analyzer = web.Name + + err := indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ + "type": custom.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": single.Name, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + + keywordFieldMapping := bleve.NewKeywordFieldMapping() + keywordFieldMapping.Analyzer = "keyword_single" + + nameMapping := bleve.NewTextFieldMapping() + nameMapping.Analyzer = "option_name" + nameMapping.IncludeTermVectors = true + nameMapping.Store = false + + nixValueMapping := bleve.NewDocumentStaticMapping() + nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) + nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) + + locFieldMapping := bleve.NewKeywordFieldMapping() + locFieldMapping.Analyzer = "loc" + locFieldMapping.IncludeTermVectors = true + locFieldMapping.Store = false + + optionMapping := bleve.NewDocumentStaticMapping() + + optionMapping.AddFieldMappingsAt("Name", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Loc", locFieldMapping) + optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) + optionMapping.AddFieldMappingsAt("Description", textFieldMapping) + + optionMapping.AddSubDocumentMapping("Default", nixValueMapping) + optionMapping.AddSubDocumentMapping("Example", nixValueMapping) + + indexMapping.AddDocumentMapping("option", optionMapping) + + return indexMapping, nil +} + +func createIndex(indexPath string) (bleve.Index, error) { + indexMapping, err := createIndexMapping() + if err != nil { + return nil, err + } + idx, err := bleve.NewUsing( + indexPath, + indexMapping, + bleve.Config.DefaultIndexType, + bleve.Config.DefaultKVStore, + map[string]interface{}{ + "nosync": true, + }, + ) + if err != nil { + return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) + } + + return idx, nil +} + +const ( + indexBaseName = "index.bleve" + metaBaseName = "meta.json" +) + +var expectedDataFiles = []string{ + metaBaseName, + indexBaseName, + "sources", +} + +func deleteIndex(dataRoot string) error { + dir, err := os.ReadDir(dataRoot) + if err != nil { + return errors.WithMessagef(err, "could not read data directory %s", dataRoot) + } + remainingFiles := slices.DeleteFunc(dir, func(e fs.DirEntry) bool { + return slices.Contains(expectedDataFiles, e.Name()) + }) + if len(remainingFiles) > 0 { + return errors.Errorf( + "cowardly refusing to remove data directory %s as it contains unknown files: %v", + dataRoot, + remainingFiles, + ) + } + + err = os.RemoveAll(dataRoot) + if err != nil { + return errors.WithMessagef(err, "could not remove data directory %s", dataRoot) + } + + return nil +} + +func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, error) { + var err error + bleve.SetLog(log.Default()) + + indexPath := path.Join(dataRoot, indexBaseName) + metaPath := path.Join(dataRoot, metaBaseName) + + exists, err := file.Exists(indexPath) + if err != nil { + return nil, nil, exists, errors.WithMessagef( + err, + "could not check if index exists at path %s", + indexPath, + ) + } + + var idx bleve.Index + var meta *Meta + if !exists || force { + if force { + err = deleteIndex(dataRoot) + if err != nil { + return nil, nil, exists, err + } + } + idx, err = createIndex(indexPath) + if err != nil { + return nil, nil, exists, err + } + + meta, err = createMeta(metaPath) + if err != nil { + return nil, nil, exists, err + } + + err = meta.Save() + if err != nil { + return nil, nil, exists, err + } + } else { + idx, err = bleve.Open(indexPath) + if err != nil { + return nil, nil, exists, errors.WithMessagef(err, "could not open index at path %s", indexPath) + } + + meta, err = openMeta(metaPath) + if err != nil { + return nil, nil, exists, err + } + + } + + return &ReadIndex{ + idx, + meta, + }, + &WriteIndex{ + idx, + meta, + }, + exists, + nil +} + +func (i *WriteIndex) ImportOptions( + ctx context.Context, + objects <-chan *options.NixOption, +) <-chan error { + var err error + errs := make(chan error) + + go func() { + defer close(errs) + batch := i.index.NewBatch() + indexMapping := i.index.Mapping() + + outer: + for opt := range objects { + select { + case <-ctx.Done(): + slog.Debug("context cancelled") + + break outer + default: + } + + doc := document.NewDocument(opt.Source + "/" + opt.Name) + err = indexMapping.MapDocument(doc, opt) + if err != nil { + errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Name) + + continue + } + + var data bytes.Buffer + enc := gob.NewEncoder(&data) + err = enc.Encode(opt) + if err != nil { + errs <- errors.WithMessage(err, "could not store option in search index") + + continue + } + field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), indexAPI.StoreField) + newDoc := doc.AddField(field) + + // slog.Debug("adding option to index", "name", opt.Name) + err = batch.IndexAdvanced(newDoc) + + if err != nil { + errs <- errors.WithMessagef(err, "could not index option %s", opt.Name) + + continue + } + } + + size := batch.Size() + slog.Debug("flushing batch", "size", size) + + err := i.index.Batch(batch) + if err != nil { + errs <- errors.WithMessagef(err, "could not flush batch") + } + }() + + return errs +} + +func (i *WriteIndex) Close() error { + err := i.index.Close() + if err != nil { + return errors.WithMessagef(err, "could not close index") + } + + return nil +} diff --git a/internal/index/search.go b/internal/index/search.go new file mode 100644 index 0000000..d069510 --- /dev/null +++ b/internal/index/search.go @@ -0,0 +1,102 @@ +package index + +import ( + "bytes" + "context" + "encoding/gob" + "searchix/internal/options" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/search" + "github.com/pkg/errors" +) + +const ResultsPerPage = 20 + +type DocumentMatch struct { + search.DocumentMatch + Data options.NixOption +} + +type Result struct { + *bleve.SearchResult + Hits []DocumentMatch +} + +type ReadIndex struct { + index bleve.Index + meta *Meta +} + +func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) { + query := bleve.NewTermQuery(name) + query.SetField("Source") + search := bleve.NewSearchRequest(query) + + result, err := index.index.SearchInContext(ctx, search) + + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + if err != nil { + return nil, errors.WithMessagef( + err, + "failed to execute search to find source %s in index", + name, + ) + } + } + + return result, nil +} + +func (index *ReadIndex) Search( + ctx context.Context, + source string, + keyword string, + from uint64, +) (*Result, error) { + sourceQuery := bleve.NewTermQuery(source) + userQuery := bleve.NewMatchQuery(keyword) + userQuery.Analyzer = "option_name" + + query := bleve.NewConjunctionQuery(sourceQuery, userQuery) + + search := bleve.NewSearchRequest(query) + search.Size = ResultsPerPage + search.Fields = []string{"_data"} + + if from != 0 { + search.From = int(from) + } + + bleveResult, err := index.index.SearchInContext(ctx, search) + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + if err != nil { + return nil, errors.WithMessage(err, "failed to execute search query") + } + + results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total)) + var buf bytes.Buffer + for i, result := range bleveResult.Hits { + _, err = buf.WriteString(result.Fields["_data"].(string)) + if err != nil { + return nil, errors.WithMessage(err, "error fetching result data") + } + err = gob.NewDecoder(&buf).Decode(&results[i].Data) + if err != nil { + return nil, errors.WithMessagef(err, "error decoding gob data: %s", buf.String()) + } + buf.Reset() + } + + return &Result{ + SearchResult: bleveResult, + Hits: results, + }, nil + } +} -- cgit 1.4.1