diff options
Diffstat (limited to 'internal/search')
-rw-r--r-- | internal/search/indexer.go | 183 | ||||
-rw-r--r-- | internal/search/search.go | 150 |
2 files changed, 219 insertions, 114 deletions
diff --git a/internal/search/indexer.go b/internal/search/indexer.go new file mode 100644 index 0000000..b0e57d4 --- /dev/null +++ b/internal/search/indexer.go @@ -0,0 +1,183 @@ +package search + +import ( + "bytes" + "context" + "encoding/gob" + "log" + "log/slog" + "path" + "searchix/internal/options" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/analyzer/web" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" + "github.com/blevesearch/bleve/v2/document" + "github.com/blevesearch/bleve/v2/mapping" + index "github.com/blevesearch/bleve_index_api" + "github.com/pkg/errors" +) + +type WriteIndex struct { + index bleve.Index + indexMapping *mapping.IndexMappingImpl +} + +func NewIndexer(dir string) (*WriteIndex, error) { + var err error + bleve.SetLog(log.Default()) + + indexPath := path.Join(dir, indexFilename) + + indexMapping := bleve.NewIndexMapping() + indexMapping.StoreDynamic = false + indexMapping.IndexDynamic = false + indexMapping.TypeField = "BleveType" + + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + + descriptionFieldMapping := bleve.NewTextFieldMapping() + descriptionFieldMapping.Store = false + descriptionFieldMapping.Analyzer = web.Name + + err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ + "type": custom.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": single.Name, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + + keywordFieldMapping := bleve.NewKeywordFieldMapping() + keywordFieldMapping.Analyzer = "keyword_single" + + nameMapping := bleve.NewTextFieldMapping() + nameMapping.Analyzer = "option_name" + nameMapping.IncludeTermVectors = true + nameMapping.Store = false + + nixValueMapping := bleve.NewDocumentStaticMapping() + nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) + nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) + + locFieldMapping := bleve.NewKeywordFieldMapping() + locFieldMapping.Analyzer = "loc" + locFieldMapping.IncludeTermVectors = true + locFieldMapping.Store = false + + optionMapping := bleve.NewDocumentStaticMapping() + + optionMapping.AddFieldMappingsAt("Option", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Loc", locFieldMapping) + optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) + optionMapping.AddFieldMappingsAt("Description", textFieldMapping) + + optionMapping.AddSubDocumentMapping("Default", nixValueMapping) + optionMapping.AddSubDocumentMapping("Example", nixValueMapping) + + indexMapping.AddDocumentMapping("option", optionMapping) + + idx, err := bleve.New(indexPath, indexMapping) + if err != nil { + return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) + } + + return &WriteIndex{ + idx, + indexMapping, + }, nil +} + +func (i *WriteIndex) ImportOptions(ctx context.Context, objects <-chan *options.NixOption) <-chan error { + var err error + errs := make(chan error) + + go func() { + defer close(errs) + batch := i.index.NewBatch() + + outer: + for opt := range objects { + select { + case <-ctx.Done(): + slog.Debug("context cancelled") + + break outer + default: + } + + doc := document.NewDocument(opt.Source + "/" + opt.Option) + err = i.indexMapping.MapDocument(doc, opt) + if err != nil { + errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Option) + + continue + } + + var data bytes.Buffer + enc := gob.NewEncoder(&data) + err = enc.Encode(opt) + if err != nil { + errs <- errors.WithMessage(err, "could not store option in search index") + + continue + } + field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField) + newDoc := doc.AddField(field) + + // slog.Debug("adding option to index", "name", opt.Option) + err = batch.IndexAdvanced(newDoc) + + if err != nil { + errs <- errors.WithMessagef(err, "could not index option %s", opt.Option) + + continue + } + } + + size := batch.Size() + slog.Debug("flushing batch", "size", size) + + err := i.index.Batch(batch) + if err != nil { + errs <- errors.WithMessagef(err, "could not flush batch") + } + }() + + return errs +} + +func (i *WriteIndex) Close() error { + err := i.index.Close() + if err != nil { + return errors.WithMessagef(err, "could not close index") + } + + return nil +} diff --git a/internal/search/search.go b/internal/search/search.go index 97d8404..92afdfb 100644 --- a/internal/search/search.go +++ b/internal/search/search.go @@ -4,151 +4,73 @@ import ( "bytes" "context" "encoding/gob" - "log" - "os" "path" "searchix/internal/options" - "github.com/bcicen/jstream" "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" - "github.com/blevesearch/bleve/v2/analysis/token/camelcase" - "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" - "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/search" - index "github.com/blevesearch/bleve_index_api" - "github.com/mitchellh/mapstructure" "github.com/pkg/errors" ) const ResultsPerPage = 20 +const indexFilename = "index.bleve" -type DocumentMatch[T options.NixOption] struct { +type DocumentMatch struct { search.DocumentMatch - Data T + Data options.NixOption } -type Result[T options.NixOption] struct { +type Result struct { *bleve.SearchResult - Hits []DocumentMatch[T] + Hits []DocumentMatch } -type Index[T options.NixOption] struct { +type ReadIndex struct { index bleve.Index } -func New[T options.NixOption](kind string) (*Index[T], error) { - var err error - bleve.SetLog(log.Default()) +func Open(dir string) (*ReadIndex, error) { + indexPath := path.Join(dir, indexFilename) - indexMapping := bleve.NewIndexMapping() - - textFieldMapping := bleve.NewTextFieldMapping() - textFieldMapping.Store = false - - descriptionFieldMapping := bleve.NewTextFieldMapping() - descriptionFieldMapping.Store = false - descriptionFieldMapping.Analyzer = "web" - - err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ - "type": custom.Name, - "tokenizer": letter.Name, - "token_filters": []string{ - camelcase.Name, - }, - }) + idx, err := bleve.Open(indexPath) if err != nil { - return nil, errors.WithMessage(err, "could not add custom analyser") + return nil, errors.WithMessagef(err, "unable to open index at path %s", indexPath) } - nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = "option_name" - nameMapping.IncludeTermVectors = true - nameMapping.Store = false - - nixValueMapping := bleve.NewDocumentStaticMapping() - nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) - nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) - - optionMapping := bleve.NewDocumentStaticMapping() - - optionMapping.AddFieldMappingsAt("Option", nameMapping) - optionMapping.AddFieldMappingsAt("Loc", bleve.NewKeywordFieldMapping()) - optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) - optionMapping.AddFieldMappingsAt("Description", textFieldMapping) - - optionMapping.AddSubDocumentMapping("Default", nixValueMapping) - optionMapping.AddSubDocumentMapping("Example", nixValueMapping) - - indexMapping.AddDocumentMapping("option", optionMapping) - - idx, err := bleve.NewMemOnly(indexMapping) - // index, err = bleve.New(path.Join(cfg.DataPath, const indexFilename = "index.bleve"), indexMapping) - - if err != nil { - return nil, errors.WithMessage(err, "error opening index") - } - batch := idx.NewBatch() - - jsonFile, err := os.Open(path.Join("data", "processed", kind+".json")) - if err != nil { - return nil, errors.WithMessage(err, "error opening json file") - } - - dec := jstream.NewDecoder(jsonFile, 1) - var opt options.NixOption - ms, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ - ErrorUnused: true, - ZeroFields: true, - Result: &opt, - }) - if err != nil { - return nil, errors.WithMessage(err, "could not create struct decoder") - } - for mv := range dec.Stream() { - opt = options.NixOption{} - orig := mv.Value.(map[string]interface{}) - err := ms.Decode(orig) // stores in opt - if err != nil { - return nil, errors.WithMessagef(err, "could not decode value: %+v", orig) - } - doc := document.NewDocument(opt.Option) - err = indexMapping.MapDocument(doc, opt) - if err != nil { - return nil, errors.WithMessagef(err, "could not map document for option: %s", opt.Option) - } + return &ReadIndex{ + idx, + }, nil +} - var data bytes.Buffer - enc := gob.NewEncoder(&data) - err = enc.Encode(opt) - if err != nil { - return nil, errors.WithMessage(err, "could not store option in search index") - } - field := document.NewTextFieldWithIndexingOptions("data", nil, data.Bytes(), index.StoreField) - newDoc := doc.AddField(field) +func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) { + query := bleve.NewTermQuery(name) + query.SetField("Source") + search := bleve.NewSearchRequest(query) - err = batch.IndexAdvanced(newDoc) + result, err := index.index.SearchInContext(ctx, search) + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: if err != nil { - return nil, errors.WithMessagef(err, "could not index option %s", opt.Option) + return nil, errors.WithMessagef(err, "failed to execute search to find source %s in index", name) } } - err = idx.Batch(batch) - if err != nil { - return nil, errors.WithMessage(err, "failed to run batch index operation") - } - return &Index[T]{ - idx, - }, nil + return result, nil } -func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) (*Result[T], error) { - query := bleve.NewMatchQuery(keyword) - query.Analyzer = "option_name" +func (index *ReadIndex) Search(ctx context.Context, source string, keyword string, from uint64) (*Result, error) { + sourceQuery := bleve.NewTermQuery(source) + userQuery := bleve.NewMatchQuery(keyword) + userQuery.Analyzer = "option_name" + + query := bleve.NewConjunctionQuery(sourceQuery, userQuery) + search := bleve.NewSearchRequest(query) search.Size = ResultsPerPage - search.Fields = []string{"data"} + search.Fields = []string{"_data"} search.Explain = true if from != 0 { @@ -164,10 +86,10 @@ func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) return nil, errors.WithMessage(err, "failed to execute search query") } - results := make([]DocumentMatch[T], min(ResultsPerPage, bleveResult.Total)) + results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total)) var buf bytes.Buffer for i, result := range bleveResult.Hits { - _, err = buf.WriteString(result.Fields["data"].(string)) + _, err = buf.WriteString(result.Fields["_data"].(string)) if err != nil { return nil, errors.WithMessage(err, "error fetching result data") } @@ -178,7 +100,7 @@ func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) buf.Reset() } - return &Result[T]{ + return &Result{ SearchResult: bleveResult, Hits: results, }, nil |