about summary refs log tree commit diff stats
path: root/internal/search/indexer.go
diff options
context:
space:
mode:
authorAlan Pearce2024-05-09 16:47:41 +0200
committerAlan Pearce2024-05-09 19:27:55 +0200
commite062ca72b222b890e345548bd8422d5df98e9fef (patch)
tree89f52ebfdb1fb8069e6323d9dde42f5491dad5d1 /internal/search/indexer.go
parent967f6fdf5c1693d3aa27079b3ae28768fb7356c6 (diff)
downloadsearchix-e062ca72b222b890e345548bd8422d5df98e9fef.tar.lz
searchix-e062ca72b222b890e345548bd8422d5df98e9fef.tar.zst
searchix-e062ca72b222b890e345548bd8422d5df98e9fef.zip
feat: import sources from configuration in go code and index options
Diffstat (limited to 'internal/search/indexer.go')
-rw-r--r--internal/search/indexer.go183
1 files changed, 183 insertions, 0 deletions
diff --git a/internal/search/indexer.go b/internal/search/indexer.go
new file mode 100644
index 0000000..b0e57d4
--- /dev/null
+++ b/internal/search/indexer.go
@@ -0,0 +1,183 @@
+package search
+
+import (
+	"bytes"
+	"context"
+	"encoding/gob"
+	"log"
+	"log/slog"
+	"path"
+	"searchix/internal/options"
+
+	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/web"
+	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
+	"github.com/blevesearch/bleve/v2/document"
+	"github.com/blevesearch/bleve/v2/mapping"
+	index "github.com/blevesearch/bleve_index_api"
+	"github.com/pkg/errors"
+)
+
+type WriteIndex struct {
+	index        bleve.Index
+	indexMapping *mapping.IndexMappingImpl
+}
+
+func NewIndexer(dir string) (*WriteIndex, error) {
+	var err error
+	bleve.SetLog(log.Default())
+
+	indexPath := path.Join(dir, indexFilename)
+
+	indexMapping := bleve.NewIndexMapping()
+	indexMapping.StoreDynamic = false
+	indexMapping.IndexDynamic = false
+	indexMapping.TypeField = "BleveType"
+
+	textFieldMapping := bleve.NewTextFieldMapping()
+	textFieldMapping.Store = false
+
+	descriptionFieldMapping := bleve.NewTextFieldMapping()
+	descriptionFieldMapping.Store = false
+	descriptionFieldMapping.Analyzer = web.Name
+
+	err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{
+		"type":      custom.Name,
+		"tokenizer": letter.Name,
+		"token_filters": []string{
+			camelcase.Name,
+		},
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+	err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{
+		"type":      keyword.Name,
+		"tokenizer": letter.Name,
+		"token_filters": []string{
+			camelcase.Name,
+		},
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+	err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{
+		"type":      keyword.Name,
+		"tokenizer": single.Name,
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+
+	keywordFieldMapping := bleve.NewKeywordFieldMapping()
+	keywordFieldMapping.Analyzer = "keyword_single"
+
+	nameMapping := bleve.NewTextFieldMapping()
+	nameMapping.Analyzer = "option_name"
+	nameMapping.IncludeTermVectors = true
+	nameMapping.Store = false
+
+	nixValueMapping := bleve.NewDocumentStaticMapping()
+	nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping)
+	nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping)
+
+	locFieldMapping := bleve.NewKeywordFieldMapping()
+	locFieldMapping.Analyzer = "loc"
+	locFieldMapping.IncludeTermVectors = true
+	locFieldMapping.Store = false
+
+	optionMapping := bleve.NewDocumentStaticMapping()
+
+	optionMapping.AddFieldMappingsAt("Option", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Loc", locFieldMapping)
+	optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping)
+	optionMapping.AddFieldMappingsAt("Description", textFieldMapping)
+
+	optionMapping.AddSubDocumentMapping("Default", nixValueMapping)
+	optionMapping.AddSubDocumentMapping("Example", nixValueMapping)
+
+	indexMapping.AddDocumentMapping("option", optionMapping)
+
+	idx, err := bleve.New(indexPath, indexMapping)
+	if err != nil {
+		return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath)
+	}
+
+	return &WriteIndex{
+		idx,
+		indexMapping,
+	}, nil
+}
+
+func (i *WriteIndex) ImportOptions(ctx context.Context, objects <-chan *options.NixOption) <-chan error {
+	var err error
+	errs := make(chan error)
+
+	go func() {
+		defer close(errs)
+		batch := i.index.NewBatch()
+
+	outer:
+		for opt := range objects {
+			select {
+			case <-ctx.Done():
+				slog.Debug("context cancelled")
+
+				break outer
+			default:
+			}
+
+			doc := document.NewDocument(opt.Source + "/" + opt.Option)
+			err = i.indexMapping.MapDocument(doc, opt)
+			if err != nil {
+				errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Option)
+
+				continue
+			}
+
+			var data bytes.Buffer
+			enc := gob.NewEncoder(&data)
+			err = enc.Encode(opt)
+			if err != nil {
+				errs <- errors.WithMessage(err, "could not store option in search index")
+
+				continue
+			}
+			field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField)
+			newDoc := doc.AddField(field)
+
+			// slog.Debug("adding option to index", "name", opt.Option)
+			err = batch.IndexAdvanced(newDoc)
+
+			if err != nil {
+				errs <- errors.WithMessagef(err, "could not index option %s", opt.Option)
+
+				continue
+			}
+		}
+
+		size := batch.Size()
+		slog.Debug("flushing batch", "size", size)
+
+		err := i.index.Batch(batch)
+		if err != nil {
+			errs <- errors.WithMessagef(err, "could not flush batch")
+		}
+	}()
+
+	return errs
+}
+
+func (i *WriteIndex) Close() error {
+	err := i.index.Close()
+	if err != nil {
+		return errors.WithMessagef(err, "could not close index")
+	}
+
+	return nil
+}