feat: promote prefix/exact name/attr matches Fixes: https://todo.sr.ht/~alanpearce/searchix/18
Alan Pearce alan@alanpearce.eu
Sat, 22 Mar 2025 16:51:43 +0100
4 files changed, 183 insertions(+), 11 deletions(-)
M .golangci.yaml → .golangci.yaml
@@ -21,6 +21,8 @@ - sloglint - unconvert - wrapcheck linters-settings: + paralleltest: + ignore-missing: true gosec: excludes: - G115
M internal/index/indexer.go → internal/index/indexer.go
@@ -8,6 +8,7 @@ "io/fs" "math" "os" "path" + "path/filepath" "slices" "go.alanpearce.eu/searchix/internal/file" @@ -89,13 +90,6 @@ "token_filters": []string{ camelcase.Name, porter.Name, }, - }) - if err != nil { - return nil, errors.WithMessage(err, "could not add custom analyser") - } - err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]any{ - "type": keyword.Name, - "tokenizer": letter.Name, }) if err != nil { return nil, errors.WithMessage(err, "could not add custom analyser") @@ -222,6 +216,13 @@ options *Options, ) (*ReadIndex, *WriteIndex, bool, errors.E) { var err errors.E bleve.SetLog(zap.NewStdLog(options.Logger.Named("bleve").GetLogger())) + if !filepath.IsAbs(dataRoot) { + wd, err := os.Getwd() + if err != nil { + return nil, nil, false, errors.WithMessagef(err, "could not get working directory") + } + dataRoot = filepath.Join(wd, dataRoot) + } indexPath := path.Join(dataRoot, indexBaseName) metaPath := path.Join(dataRoot, metaBaseName)
M internal/index/search.go → internal/index/search.go
@@ -128,8 +128,8 @@ query := bleve.NewBooleanQuery() // match the user's query in any field ... query.AddMust(bleve.NewDisjunctionQuery( - bleve.NewTermQuery(keyword), - bleve.NewPrefixQuery(keyword), + setBoost(bleve.NewTermQuery(keyword), 50), + setBoost(bleve.NewPrefixQuery(keyword), 25), bleve.NewMatchPhraseQuery(keyword), bleve.NewMatchQuery(keyword), )) @@ -140,12 +140,22 @@ setField(bleve.NewTermQuery(source.Key), "Source"), ) } else { q := bleve.NewDisjunctionQuery( - setBoost(setField(bleve.NewTermQuery("nixpkgs"), "Source"), -150), - setBoost(setField(bleve.NewTermQuery("nur"), "Source"), -200), + setBoost(setField(bleve.NewTermQuery("nixpkgs"), "Source"), -1000), + setBoost(setField(bleve.NewTermQuery("nur"), "Source"), -5000), ) query.AddShould(q) } + + mainProgramQuery := bleve.NewMatchQuery(keyword) + mainProgramQuery.SetField("MainProgram") + mainProgramQuery.SetBoost(50) + query.AddShould(mainProgramQuery) + + mainProgramLiteralQuery := bleve.NewTermQuery(keyword) + mainProgramLiteralQuery.SetField("MainProgram") + mainProgramLiteralQuery.SetBoost(100) + query.AddShould(mainProgramLiteralQuery) programsQuery := bleve.NewMatchQuery(keyword) programsQuery.SetField("Programs") @@ -215,3 +225,12 @@ } return nil, err } + +func (index *ReadIndex) Close() error { + err := index.index.Close() + if err != nil { + return errors.WithStack(err) + } + + return nil +}
A internal/index/search_test.go
@@ -0,0 +1,150 @@+package index_test + +import ( + "context" + "maps" + "math" + "slices" + "testing" + "time" + + "go.alanpearce.eu/searchix/internal/config" + "go.alanpearce.eu/searchix/internal/index" + "go.alanpearce.eu/searchix/internal/nix" + "go.alanpearce.eu/x/log" +) + +const dataRoot = "../../data" + +func TestSearchGitPackagesFirst(t *testing.T) { + log := log.Configure(false) + cfg := config.DefaultConfig + + read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{ + Logger: log.Named("index"), + LowMemory: false, + }) + defer read.Close() + if err != nil { + t.Fatal(err) + } + if !exists { + t.Fatal("expected index to exist") + } + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + source := cfg.Importer.Sources["nixpkgs"] + if source == nil || !source.Enable { + t.Fatal("expected source to exist and be enabled") + } + + result, err := read.Search( + ctx, + source, + "git", + 0, + 100, + ) + if err != nil { + t.Fatal(err) + } + + if result.Total < 4 { + t.Errorf("Expected at least 4 results, got %d", result.Total) + } + important := map[string]int{ + "git": 0, + "git-doc": 0, + "gitFull": 0, + "gitMinimal": 0, + "gitSVN": 0, + } + var i int + for hit := range result.Hits { + data := hit.Data.(nix.Package) + if _, found := important[data.Attribute]; found { + important[data.Attribute] = i + } + i++ + } + if slices.Max(slices.Collect(maps.Values(important))) > len(important) { + t.Errorf( + "Expected all of %s to be the first %d matches, got %v", + slices.Collect(maps.Keys(important)), + len(important), + important, + ) + } +} + +func TestSearchJujutsuPackagesFirst(t *testing.T) { + log := log.Configure(false) + cfg := config.DefaultConfig + + read, _, exists, err := index.OpenOrCreate(dataRoot, false, &index.Options{ + Logger: log.Named("index"), + LowMemory: false, + }) + defer read.Close() + if err != nil { + t.Fatal(err) + } + if !exists { + t.Fatal("expected index to exist") + } + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + source := cfg.Importer.Sources["nixpkgs"] + if source == nil || !source.Enable { + t.Fatal("expected source to exist and be enabled") + } + + result, err := read.Search( + ctx, + source, + "jj", + 0, + 100, + ) + if err != nil { + t.Fatal(err) + } + + if result.Total < 4 { + t.Errorf("Expected at least 4 results, got %d", result.Total) + } + important := map[string]int{ + "jj": 0, + "jujutsu": 0, + "lazyjj": 0, + "jjui": 0, + "jj-fzf": 0, + } + matches := []string{} + unwanted := "javacc" + unwantedIndex := math.MaxInt + var i int + for hit := range result.Hits { + data := hit.Data.(nix.Package) + if _, found := important[data.Attribute]; found { + matches = append(matches, data.Attribute) + } else if data.Attribute == unwanted { + unwantedIndex = i + matches = append(matches, data.Attribute) + } + i++ + } + if slices.Max(slices.Collect(maps.Values(important))) > unwantedIndex { + t.Errorf( + "Expected all of %s to be above unwanted result %s at index %d. Results: %v", + slices.Collect(maps.Keys(important)), + unwanted, + unwantedIndex, + matches, + ) + } +}