all repos — searchix @ b650b993e38f9919d2e65b732a5dee0267c71b84

Search engine for NixOS, nix-darwin, home-manager and NUR users

feat: split compound words in names into n-grams Implements: https://todo.sr.ht/~alanpearce/searchix/9

Alan Pearce
commit

b650b993e38f9919d2e65b732a5dee0267c71b84

parent

1e3868ef2bdedb72a9ec535489da9aeb8ae93c16

1 file changed, 27 insertions(+), 3 deletions(-)

jump to
M internal/index/indexer.gointernal/index/indexer.go
@@ -16,13 +16,15 @@ "go.alanpearce.eu/x/log"
"go.uber.org/zap" "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" "github.com/blevesearch/bleve/v2/analysis/analyzer/simple" "github.com/blevesearch/bleve/v2/analysis/analyzer/web" - "github.com/blevesearch/bleve/v2/analysis/lang/en" "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/token/ngram" "github.com/blevesearch/bleve/v2/analysis/token/porter" "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/mapping" indexAPI "github.com/blevesearch/bleve_index_api"
@@ -58,6 +60,28 @@ descriptionFieldMapping.Store = false
descriptionFieldMapping.Analyzer = web.Name var err error + + err = indexMapping.AddCustomTokenFilter("ngram", map[string]any{ + "type": ngram.Name, + "min": 3.0, + "max": 25.0, + }) + if err != nil { + return nil, errors.WithMessage(err, "failed to add ngram token filter") + } + + err = indexMapping.AddCustomAnalyzer("c_name", map[string]any{ + "type": custom.Name, + "tokenizer": unicode.Name, + "token_filters": []string{ + camelcase.Name, + "ngram", + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("loc", map[string]any{ "type": keyword.Name, "tokenizer": letter.Name,
@@ -83,7 +107,7 @@ keywordFieldMapping := bleve.NewKeywordFieldMapping()
keywordFieldMapping.Analyzer = simple.Name nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = en.AnalyzerName + nameMapping.Analyzer = "c_name" nameMapping.IncludeTermVectors = true nameMapping.Store = false
@@ -110,7 +134,7 @@
packageMapping := bleve.NewDocumentStaticMapping() packageMapping.AddFieldMappingsAt("Name", nameMapping) - packageMapping.AddFieldMappingsAt("Attribute", keywordFieldMapping) + packageMapping.AddFieldMappingsAt("Attribute", nameMapping) packageMapping.AddFieldMappingsAt("Source", keywordFieldMapping) packageMapping.AddFieldMappingsAt("Description", descriptionFieldMapping) packageMapping.AddFieldMappingsAt("MainProgram", keywordFieldMapping)