about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2025-01-02 15:14:34 +0100
committerAlan Pearce2025-01-02 15:14:34 +0100
commit797c6e27e18f6c47ceb4266d747a05064365c32d (patch)
tree2c67bb5e15f5bc1c3679ff8107b72b6e71132a53
parent58643f38bdd62dc538a03865b1f81eb77d957779 (diff)
downloadsearchix-797c6e27e18f6c47ceb4266d747a05064365c32d.tar.lz
searchix-797c6e27e18f6c47ceb4266d747a05064365c32d.tar.zst
searchix-797c6e27e18f6c47ceb4266d747a05064365c32d.zip
fix: boost non-stemmed matches over stemmed matches
https://todo.sr.ht/~alanpearce/searchix/7
-rw-r--r--internal/index/indexer.go19
-rw-r--r--internal/index/search.go31
2 files changed, 26 insertions, 24 deletions
diff --git a/internal/index/indexer.go b/internal/index/indexer.go
index 476ca29..0c12104 100644
--- a/internal/index/indexer.go
+++ b/internal/index/indexer.go
@@ -18,11 +18,11 @@ import (
 	"github.com/blevesearch/bleve/v2"
 	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
 	"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
 	"github.com/blevesearch/bleve/v2/analysis/analyzer/web"
 	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
 	"github.com/blevesearch/bleve/v2/analysis/token/porter"
 	"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
-	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
 	"github.com/blevesearch/bleve/v2/document"
 	"github.com/blevesearch/bleve/v2/mapping"
 	indexAPI "github.com/blevesearch/bleve_index_api"
@@ -81,20 +81,19 @@ func createIndexMapping() (mapping.IndexMapping, error) {
 	}
 	err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{
 		"type":      keyword.Name,
-		"tokenizer": single.Name,
-		"token_filters": []string{
-			porter.Name,
-		},
+		"tokenizer": letter.Name,
 	})
 	if err != nil {
 		return nil, errors.WithMessage(err, "could not add custom analyser")
 	}
 
+	identityFieldMapping := bleve.NewKeywordFieldMapping()
+
 	keywordFieldMapping := bleve.NewKeywordFieldMapping()
-	keywordFieldMapping.Analyzer = "keyword_single"
+	keywordFieldMapping.Analyzer = simple.Name
 
 	nameMapping := bleve.NewTextFieldMapping()
-	nameMapping.Analyzer = "option_name"
+	nameMapping.Analyzer = simple.Name
 	nameMapping.IncludeTermVectors = true
 	nameMapping.Store = false
 
@@ -110,10 +109,10 @@ func createIndexMapping() (mapping.IndexMapping, error) {
 	optionMapping := bleve.NewDocumentStaticMapping()
 
 	optionMapping.AddFieldMappingsAt("Name", nameMapping)
-	optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Source", identityFieldMapping)
 	optionMapping.AddFieldMappingsAt("Loc", locFieldMapping)
 	optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping)
-	optionMapping.AddFieldMappingsAt("Description", textFieldMapping)
+	optionMapping.AddFieldMappingsAt("Description", descriptionFieldMapping)
 
 	optionMapping.AddSubDocumentMapping("Default", nixValueMapping)
 	optionMapping.AddSubDocumentMapping("Example", nixValueMapping)
@@ -123,7 +122,7 @@ func createIndexMapping() (mapping.IndexMapping, error) {
 	packageMapping.AddFieldMappingsAt("Name", nameMapping)
 	packageMapping.AddFieldMappingsAt("Attribute", keywordFieldMapping)
 	packageMapping.AddFieldMappingsAt("Source", keywordFieldMapping)
-	packageMapping.AddFieldMappingsAt("Description", textFieldMapping)
+	packageMapping.AddFieldMappingsAt("Description", descriptionFieldMapping)
 	packageMapping.AddFieldMappingsAt("MainProgram", keywordFieldMapping)
 	packageMapping.AddFieldMappingsAt("PackageSet", keywordFieldMapping)
 	packageMapping.AddFieldMappingsAt("Platforms", keywordFieldMapping)
diff --git a/internal/index/search.go b/internal/index/search.go
index 4fe1736..d576d5a 100644
--- a/internal/index/search.go
+++ b/internal/index/search.go
@@ -10,7 +10,6 @@ import (
 	"go.alanpearce.eu/x/log"
 
 	"github.com/blevesearch/bleve/v2"
-	"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
 	"github.com/blevesearch/bleve/v2/search"
 	"github.com/blevesearch/bleve/v2/search/query"
 	"github.com/pkg/errors"
@@ -142,7 +141,15 @@ func (index *ReadIndex) Search(
 	// match the user's query in any field ...
 	userQuery := bleve.NewMatchQuery(keyword)
 	userQuery.Analyzer = "option_name"
-	query.AddMust(userQuery)
+
+	userQueryLoc := bleve.NewMatchQuery(keyword)
+	userQueryLoc.Analyzer = "loc"
+	userQueryLoc.SetBoost(2)
+
+	query.AddMust(bleve.NewDisjunctionQuery(
+		userQuery,
+		userQueryLoc,
+	))
 
 	if source != nil {
 		query.AddMust(
@@ -151,27 +158,23 @@ func (index *ReadIndex) Search(
 	} else {
 		q := bleve.NewTermQuery("nixpkgs")
 		q.SetField("Source")
-		q.SetBoost(-100)
+		q.SetBoost(-150)
 
 		query.AddShould(q)
 	}
 
-	// ...and boost it if it matches any of these
-	query.AddShould(
-		setField(bleve.NewMatchQuery(keyword), "MainProgram"),
-		setField(bleve.NewMatchQuery(keyword), "Name"),
-		setField(bleve.NewMatchQuery(keyword), "Attribute"),
-	)
-	query.AddShould(
-		setField(bleve.NewMatchQuery(keyword), "Loc"),
-		setField(bleve.NewMatchQuery(keyword), "Name"),
-	)
+	attrQuery := bleve.NewMatchQuery(keyword)
+	attrQuery.SetField("Attribute")
+	attrQuery.Analyzer = "keyword_single"
+	query.AddShould(attrQuery)
+
 	nameLiteralQuery := bleve.NewMatchQuery(keyword)
 	nameLiteralQuery.SetField("Name")
-	nameLiteralQuery.Analyzer = standard.Name
+	nameLiteralQuery.Analyzer = "keyword_single"
 	query.AddShould(nameLiteralQuery)
 
 	search := bleve.NewSearchRequest(query)
+	search.Explain = config.DevMode
 	search.Size = ResultsPerPage
 
 	if from != 0 {