about summary refs log tree commit diff stats
path: root/internal/search
diff options
context:
space:
mode:
authorAlan Pearce2024-05-09 16:47:41 +0200
committerAlan Pearce2024-05-09 19:27:55 +0200
commite062ca72b222b890e345548bd8422d5df98e9fef (patch)
tree89f52ebfdb1fb8069e6323d9dde42f5491dad5d1 /internal/search
parent967f6fdf5c1693d3aa27079b3ae28768fb7356c6 (diff)
downloadsearchix-e062ca72b222b890e345548bd8422d5df98e9fef.tar.lz
searchix-e062ca72b222b890e345548bd8422d5df98e9fef.tar.zst
searchix-e062ca72b222b890e345548bd8422d5df98e9fef.zip
feat: import sources from configuration in go code and index options
Diffstat (limited to 'internal/search')
-rw-r--r--internal/search/indexer.go183
-rw-r--r--internal/search/search.go150
2 files changed, 219 insertions, 114 deletions
diff --git a/internal/search/indexer.go b/internal/search/indexer.go
new file mode 100644
index 0000000..b0e57d4
--- /dev/null
+++ b/internal/search/indexer.go
@@ -0,0 +1,183 @@
+package search
+
+import (
+	"bytes"
+	"context"
+	"encoding/gob"
+	"log"
+	"log/slog"
+	"path"
+	"searchix/internal/options"
+
+	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/web"
+	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
+	"github.com/blevesearch/bleve/v2/document"
+	"github.com/blevesearch/bleve/v2/mapping"
+	index "github.com/blevesearch/bleve_index_api"
+	"github.com/pkg/errors"
+)
+
+type WriteIndex struct {
+	index        bleve.Index
+	indexMapping *mapping.IndexMappingImpl
+}
+
+func NewIndexer(dir string) (*WriteIndex, error) {
+	var err error
+	bleve.SetLog(log.Default())
+
+	indexPath := path.Join(dir, indexFilename)
+
+	indexMapping := bleve.NewIndexMapping()
+	indexMapping.StoreDynamic = false
+	indexMapping.IndexDynamic = false
+	indexMapping.TypeField = "BleveType"
+
+	textFieldMapping := bleve.NewTextFieldMapping()
+	textFieldMapping.Store = false
+
+	descriptionFieldMapping := bleve.NewTextFieldMapping()
+	descriptionFieldMapping.Store = false
+	descriptionFieldMapping.Analyzer = web.Name
+
+	err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{
+		"type":      custom.Name,
+		"tokenizer": letter.Name,
+		"token_filters": []string{
+			camelcase.Name,
+		},
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+	err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{
+		"type":      keyword.Name,
+		"tokenizer": letter.Name,
+		"token_filters": []string{
+			camelcase.Name,
+		},
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+	err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{
+		"type":      keyword.Name,
+		"tokenizer": single.Name,
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+
+	keywordFieldMapping := bleve.NewKeywordFieldMapping()
+	keywordFieldMapping.Analyzer = "keyword_single"
+
+	nameMapping := bleve.NewTextFieldMapping()
+	nameMapping.Analyzer = "option_name"
+	nameMapping.IncludeTermVectors = true
+	nameMapping.Store = false
+
+	nixValueMapping := bleve.NewDocumentStaticMapping()
+	nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping)
+	nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping)
+
+	locFieldMapping := bleve.NewKeywordFieldMapping()
+	locFieldMapping.Analyzer = "loc"
+	locFieldMapping.IncludeTermVectors = true
+	locFieldMapping.Store = false
+
+	optionMapping := bleve.NewDocumentStaticMapping()
+
+	optionMapping.AddFieldMappingsAt("Option", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Loc", locFieldMapping)
+	optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping)
+	optionMapping.AddFieldMappingsAt("Description", textFieldMapping)
+
+	optionMapping.AddSubDocumentMapping("Default", nixValueMapping)
+	optionMapping.AddSubDocumentMapping("Example", nixValueMapping)
+
+	indexMapping.AddDocumentMapping("option", optionMapping)
+
+	idx, err := bleve.New(indexPath, indexMapping)
+	if err != nil {
+		return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath)
+	}
+
+	return &WriteIndex{
+		idx,
+		indexMapping,
+	}, nil
+}
+
+func (i *WriteIndex) ImportOptions(ctx context.Context, objects <-chan *options.NixOption) <-chan error {
+	var err error
+	errs := make(chan error)
+
+	go func() {
+		defer close(errs)
+		batch := i.index.NewBatch()
+
+	outer:
+		for opt := range objects {
+			select {
+			case <-ctx.Done():
+				slog.Debug("context cancelled")
+
+				break outer
+			default:
+			}
+
+			doc := document.NewDocument(opt.Source + "/" + opt.Option)
+			err = i.indexMapping.MapDocument(doc, opt)
+			if err != nil {
+				errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Option)
+
+				continue
+			}
+
+			var data bytes.Buffer
+			enc := gob.NewEncoder(&data)
+			err = enc.Encode(opt)
+			if err != nil {
+				errs <- errors.WithMessage(err, "could not store option in search index")
+
+				continue
+			}
+			field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField)
+			newDoc := doc.AddField(field)
+
+			// slog.Debug("adding option to index", "name", opt.Option)
+			err = batch.IndexAdvanced(newDoc)
+
+			if err != nil {
+				errs <- errors.WithMessagef(err, "could not index option %s", opt.Option)
+
+				continue
+			}
+		}
+
+		size := batch.Size()
+		slog.Debug("flushing batch", "size", size)
+
+		err := i.index.Batch(batch)
+		if err != nil {
+			errs <- errors.WithMessagef(err, "could not flush batch")
+		}
+	}()
+
+	return errs
+}
+
+func (i *WriteIndex) Close() error {
+	err := i.index.Close()
+	if err != nil {
+		return errors.WithMessagef(err, "could not close index")
+	}
+
+	return nil
+}
diff --git a/internal/search/search.go b/internal/search/search.go
index 97d8404..92afdfb 100644
--- a/internal/search/search.go
+++ b/internal/search/search.go
@@ -4,151 +4,73 @@ import (
 	"bytes"
 	"context"
 	"encoding/gob"
-	"log"
-	"os"
 	"path"
 	"searchix/internal/options"
 
-	"github.com/bcicen/jstream"
 	"github.com/blevesearch/bleve/v2"
-	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
-	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
-	"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
-	"github.com/blevesearch/bleve/v2/document"
 	"github.com/blevesearch/bleve/v2/search"
-	index "github.com/blevesearch/bleve_index_api"
-	"github.com/mitchellh/mapstructure"
 	"github.com/pkg/errors"
 )
 
 const ResultsPerPage = 20
+const indexFilename = "index.bleve"
 
-type DocumentMatch[T options.NixOption] struct {
+type DocumentMatch struct {
 	search.DocumentMatch
-	Data T
+	Data options.NixOption
 }
 
-type Result[T options.NixOption] struct {
+type Result struct {
 	*bleve.SearchResult
-	Hits []DocumentMatch[T]
+	Hits []DocumentMatch
 }
 
-type Index[T options.NixOption] struct {
+type ReadIndex struct {
 	index bleve.Index
 }
 
-func New[T options.NixOption](kind string) (*Index[T], error) {
-	var err error
-	bleve.SetLog(log.Default())
+func Open(dir string) (*ReadIndex, error) {
+	indexPath := path.Join(dir, indexFilename)
 
-	indexMapping := bleve.NewIndexMapping()
-
-	textFieldMapping := bleve.NewTextFieldMapping()
-	textFieldMapping.Store = false
-
-	descriptionFieldMapping := bleve.NewTextFieldMapping()
-	descriptionFieldMapping.Store = false
-	descriptionFieldMapping.Analyzer = "web"
-
-	err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{
-		"type":      custom.Name,
-		"tokenizer": letter.Name,
-		"token_filters": []string{
-			camelcase.Name,
-		},
-	})
+	idx, err := bleve.Open(indexPath)
 	if err != nil {
-		return nil, errors.WithMessage(err, "could not add custom analyser")
+		return nil, errors.WithMessagef(err, "unable to open index at path %s", indexPath)
 	}
-	nameMapping := bleve.NewTextFieldMapping()
-	nameMapping.Analyzer = "option_name"
-	nameMapping.IncludeTermVectors = true
-	nameMapping.Store = false
-
-	nixValueMapping := bleve.NewDocumentStaticMapping()
-	nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping)
-	nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping)
-
-	optionMapping := bleve.NewDocumentStaticMapping()
-
-	optionMapping.AddFieldMappingsAt("Option", nameMapping)
-	optionMapping.AddFieldMappingsAt("Loc", bleve.NewKeywordFieldMapping())
-	optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping)
-	optionMapping.AddFieldMappingsAt("Description", textFieldMapping)
-
-	optionMapping.AddSubDocumentMapping("Default", nixValueMapping)
-	optionMapping.AddSubDocumentMapping("Example", nixValueMapping)
-
-	indexMapping.AddDocumentMapping("option", optionMapping)
-
-	idx, err := bleve.NewMemOnly(indexMapping)
-	// index, err = bleve.New(path.Join(cfg.DataPath, const indexFilename = "index.bleve"), indexMapping)
-
-	if err != nil {
-		return nil, errors.WithMessage(err, "error opening index")
-	}
-	batch := idx.NewBatch()
-
-	jsonFile, err := os.Open(path.Join("data", "processed", kind+".json"))
-	if err != nil {
-		return nil, errors.WithMessage(err, "error opening json file")
-	}
-
-	dec := jstream.NewDecoder(jsonFile, 1)
-	var opt options.NixOption
-	ms, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{
-		ErrorUnused: true,
-		ZeroFields:  true,
-		Result:      &opt,
-	})
-	if err != nil {
-		return nil, errors.WithMessage(err, "could not create struct decoder")
-	}
-	for mv := range dec.Stream() {
-		opt = options.NixOption{}
-		orig := mv.Value.(map[string]interface{})
-		err := ms.Decode(orig) // stores in opt
-		if err != nil {
-			return nil, errors.WithMessagef(err, "could not decode value: %+v", orig)
-		}
 
-		doc := document.NewDocument(opt.Option)
-		err = indexMapping.MapDocument(doc, opt)
-		if err != nil {
-			return nil, errors.WithMessagef(err, "could not map document for option: %s", opt.Option)
-		}
+	return &ReadIndex{
+		idx,
+	}, nil
+}
 
-		var data bytes.Buffer
-		enc := gob.NewEncoder(&data)
-		err = enc.Encode(opt)
-		if err != nil {
-			return nil, errors.WithMessage(err, "could not store option in search index")
-		}
-		field := document.NewTextFieldWithIndexingOptions("data", nil, data.Bytes(), index.StoreField)
-		newDoc := doc.AddField(field)
+func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) {
+	query := bleve.NewTermQuery(name)
+	query.SetField("Source")
+	search := bleve.NewSearchRequest(query)
 
-		err = batch.IndexAdvanced(newDoc)
+	result, err := index.index.SearchInContext(ctx, search)
 
+	select {
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	default:
 		if err != nil {
-			return nil, errors.WithMessagef(err, "could not index option %s", opt.Option)
+			return nil, errors.WithMessagef(err, "failed to execute search to find source %s in index", name)
 		}
 	}
-	err = idx.Batch(batch)
-	if err != nil {
-		return nil, errors.WithMessage(err, "failed to run batch index operation")
-	}
 
-	return &Index[T]{
-		idx,
-	}, nil
+	return result, nil
 }
 
-func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) (*Result[T], error) {
-	query := bleve.NewMatchQuery(keyword)
-	query.Analyzer = "option_name"
+func (index *ReadIndex) Search(ctx context.Context, source string, keyword string, from uint64) (*Result, error) {
+	sourceQuery := bleve.NewTermQuery(source)
+	userQuery := bleve.NewMatchQuery(keyword)
+	userQuery.Analyzer = "option_name"
+
+	query := bleve.NewConjunctionQuery(sourceQuery, userQuery)
+
 	search := bleve.NewSearchRequest(query)
 	search.Size = ResultsPerPage
-	search.Fields = []string{"data"}
+	search.Fields = []string{"_data"}
 	search.Explain = true
 
 	if from != 0 {
@@ -164,10 +86,10 @@ func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64)
 			return nil, errors.WithMessage(err, "failed to execute search query")
 		}
 
-		results := make([]DocumentMatch[T], min(ResultsPerPage, bleveResult.Total))
+		results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total))
 		var buf bytes.Buffer
 		for i, result := range bleveResult.Hits {
-			_, err = buf.WriteString(result.Fields["data"].(string))
+			_, err = buf.WriteString(result.Fields["_data"].(string))
 			if err != nil {
 				return nil, errors.WithMessage(err, "error fetching result data")
 			}
@@ -178,7 +100,7 @@ func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64)
 			buf.Reset()
 		}
 
-		return &Result[T]{
+		return &Result{
 			SearchResult: bleveResult,
 			Hits:         results,
 		}, nil