about summary refs log tree commit diff stats
path: root/internal/index
diff options
context:
space:
mode:
authorAlan Pearce2024-05-13 21:50:14 +0200
committerAlan Pearce2024-05-13 22:25:46 +0200
commit6b40e0c0fa90f11be14a93f1d6275779fd645cac (patch)
tree0df5bea87242450829ca441d235c077a0e09a149 /internal/index
parent2722f24af87f437ed9fcb8cc743ad1784141fd3a (diff)
downloadsearchix-6b40e0c0fa90f11be14a93f1d6275779fd645cac.tar.lz
searchix-6b40e0c0fa90f11be14a93f1d6275779fd645cac.tar.zst
searchix-6b40e0c0fa90f11be14a93f1d6275779fd645cac.zip
refactor: combine import and web server into one binary
Diffstat (limited to 'internal/index')
-rw-r--r--internal/index/index_meta.go73
-rw-r--r--internal/index/indexer.go298
-rw-r--r--internal/index/search.go102
3 files changed, 473 insertions, 0 deletions
diff --git a/internal/index/index_meta.go b/internal/index/index_meta.go
new file mode 100644
index 0000000..e24cd3b
--- /dev/null
+++ b/internal/index/index_meta.go
@@ -0,0 +1,73 @@
+package index
+
+import (
+	"encoding/json"
+	"log/slog"
+	"os"
+	"searchix/internal/file"
+
+	"github.com/pkg/errors"
+)
+
+const CurrentSchemaVersion = 1
+
+type Meta struct {
+	path          string
+	SchemaVersion int
+}
+
+func createMeta(path string) (*Meta, error) {
+	exists, err := file.Exists(path)
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not check for existence of index metadata")
+	}
+	if exists {
+		return nil, errors.New("index metadata already exists")
+	}
+
+	return &Meta{
+		path:          path,
+		SchemaVersion: CurrentSchemaVersion,
+	}, nil
+}
+
+func openMeta(path string) (*Meta, error) {
+	j, err := os.ReadFile(path)
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not open index metadata file")
+	}
+	var meta Meta
+	err = json.Unmarshal(j, &meta)
+	if err != nil {
+		return nil, errors.WithMessage(err, "index metadata is corrupt, try replacing the index")
+	}
+
+	meta.checkSchemaVersion()
+
+	return &meta, nil
+}
+
+func (i *Meta) checkSchemaVersion() {
+	if i.SchemaVersion < CurrentSchemaVersion {
+		slog.Warn(
+			"Index schema version out of date, suggest re-indexing",
+			"schema_version",
+			i.SchemaVersion,
+			"latest_version",
+			CurrentSchemaVersion,
+		)
+	}
+}
+
+func (i *Meta) Save() error {
+	j, err := json.Marshal(i)
+	if err != nil {
+		return errors.WithMessage(err, "could not prepare index metadata for saving")
+	}
+	err = os.WriteFile(i.path, j, 0o600)
+	if err != nil {
+		return errors.WithMessage(err, "could not save index metadata")
+	}
+
+	return nil
+}
diff --git a/internal/index/indexer.go b/internal/index/indexer.go
new file mode 100644
index 0000000..63cf1a6
--- /dev/null
+++ b/internal/index/indexer.go
@@ -0,0 +1,298 @@
+package index
+
+import (
+	"bytes"
+	"context"
+	"encoding/gob"
+	"io/fs"
+	"log"
+	"log/slog"
+	"os"
+	"path"
+	"searchix/internal/file"
+	"searchix/internal/options"
+	"slices"
+
+	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
+	"github.com/blevesearch/bleve/v2/analysis/analyzer/web"
+	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
+	"github.com/blevesearch/bleve/v2/document"
+	"github.com/blevesearch/bleve/v2/mapping"
+	indexAPI "github.com/blevesearch/bleve_index_api"
+	"github.com/pkg/errors"
+)
+
+type WriteIndex struct {
+	index bleve.Index
+	meta  *Meta
+}
+
+func createIndexMapping() (mapping.IndexMapping, error) {
+	indexMapping := bleve.NewIndexMapping()
+	indexMapping.StoreDynamic = false
+	indexMapping.IndexDynamic = false
+	indexMapping.TypeField = "BleveType"
+
+	textFieldMapping := bleve.NewTextFieldMapping()
+	textFieldMapping.Store = false
+
+	descriptionFieldMapping := bleve.NewTextFieldMapping()
+	descriptionFieldMapping.Store = false
+	descriptionFieldMapping.Analyzer = web.Name
+
+	err := indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{
+		"type":      custom.Name,
+		"tokenizer": letter.Name,
+		"token_filters": []string{
+			camelcase.Name,
+		},
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+	err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{
+		"type":      keyword.Name,
+		"tokenizer": letter.Name,
+		"token_filters": []string{
+			camelcase.Name,
+		},
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+	err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{
+		"type":      keyword.Name,
+		"tokenizer": single.Name,
+	})
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not add custom analyser")
+	}
+
+	keywordFieldMapping := bleve.NewKeywordFieldMapping()
+	keywordFieldMapping.Analyzer = "keyword_single"
+
+	nameMapping := bleve.NewTextFieldMapping()
+	nameMapping.Analyzer = "option_name"
+	nameMapping.IncludeTermVectors = true
+	nameMapping.Store = false
+
+	nixValueMapping := bleve.NewDocumentStaticMapping()
+	nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping)
+	nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping)
+
+	locFieldMapping := bleve.NewKeywordFieldMapping()
+	locFieldMapping.Analyzer = "loc"
+	locFieldMapping.IncludeTermVectors = true
+	locFieldMapping.Store = false
+
+	optionMapping := bleve.NewDocumentStaticMapping()
+
+	optionMapping.AddFieldMappingsAt("Name", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping)
+	optionMapping.AddFieldMappingsAt("Loc", locFieldMapping)
+	optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping)
+	optionMapping.AddFieldMappingsAt("Description", textFieldMapping)
+
+	optionMapping.AddSubDocumentMapping("Default", nixValueMapping)
+	optionMapping.AddSubDocumentMapping("Example", nixValueMapping)
+
+	indexMapping.AddDocumentMapping("option", optionMapping)
+
+	return indexMapping, nil
+}
+
+func createIndex(indexPath string) (bleve.Index, error) {
+	indexMapping, err := createIndexMapping()
+	if err != nil {
+		return nil, err
+	}
+	idx, err := bleve.NewUsing(
+		indexPath,
+		indexMapping,
+		bleve.Config.DefaultIndexType,
+		bleve.Config.DefaultKVStore,
+		map[string]interface{}{
+			"nosync": true,
+		},
+	)
+	if err != nil {
+		return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath)
+	}
+
+	return idx, nil
+}
+
+const (
+	indexBaseName = "index.bleve"
+	metaBaseName  = "meta.json"
+)
+
+var expectedDataFiles = []string{
+	metaBaseName,
+	indexBaseName,
+	"sources",
+}
+
+func deleteIndex(dataRoot string) error {
+	dir, err := os.ReadDir(dataRoot)
+	if err != nil {
+		return errors.WithMessagef(err, "could not read data directory %s", dataRoot)
+	}
+	remainingFiles := slices.DeleteFunc(dir, func(e fs.DirEntry) bool {
+		return slices.Contains(expectedDataFiles, e.Name())
+	})
+	if len(remainingFiles) > 0 {
+		return errors.Errorf(
+			"cowardly refusing to remove data directory %s as it contains unknown files: %v",
+			dataRoot,
+			remainingFiles,
+		)
+	}
+
+	err = os.RemoveAll(dataRoot)
+	if err != nil {
+		return errors.WithMessagef(err, "could not remove data directory %s", dataRoot)
+	}
+
+	return nil
+}
+
+func OpenOrCreate(dataRoot string, force bool) (*ReadIndex, *WriteIndex, bool, error) {
+	var err error
+	bleve.SetLog(log.Default())
+
+	indexPath := path.Join(dataRoot, indexBaseName)
+	metaPath := path.Join(dataRoot, metaBaseName)
+
+	exists, err := file.Exists(indexPath)
+	if err != nil {
+		return nil, nil, exists, errors.WithMessagef(
+			err,
+			"could not check if index exists at path %s",
+			indexPath,
+		)
+	}
+
+	var idx bleve.Index
+	var meta *Meta
+	if !exists || force {
+		if force {
+			err = deleteIndex(dataRoot)
+			if err != nil {
+				return nil, nil, exists, err
+			}
+		}
+		idx, err = createIndex(indexPath)
+		if err != nil {
+			return nil, nil, exists, err
+		}
+
+		meta, err = createMeta(metaPath)
+		if err != nil {
+			return nil, nil, exists, err
+		}
+
+		err = meta.Save()
+		if err != nil {
+			return nil, nil, exists, err
+		}
+	} else {
+		idx, err = bleve.Open(indexPath)
+		if err != nil {
+			return nil, nil, exists, errors.WithMessagef(err, "could not open index at path %s", indexPath)
+		}
+
+		meta, err = openMeta(metaPath)
+		if err != nil {
+			return nil, nil, exists, err
+		}
+
+	}
+
+	return &ReadIndex{
+			idx,
+			meta,
+		},
+		&WriteIndex{
+			idx,
+			meta,
+		},
+		exists,
+		nil
+}
+
+func (i *WriteIndex) ImportOptions(
+	ctx context.Context,
+	objects <-chan *options.NixOption,
+) <-chan error {
+	var err error
+	errs := make(chan error)
+
+	go func() {
+		defer close(errs)
+		batch := i.index.NewBatch()
+		indexMapping := i.index.Mapping()
+
+	outer:
+		for opt := range objects {
+			select {
+			case <-ctx.Done():
+				slog.Debug("context cancelled")
+
+				break outer
+			default:
+			}
+
+			doc := document.NewDocument(opt.Source + "/" + opt.Name)
+			err = indexMapping.MapDocument(doc, opt)
+			if err != nil {
+				errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Name)
+
+				continue
+			}
+
+			var data bytes.Buffer
+			enc := gob.NewEncoder(&data)
+			err = enc.Encode(opt)
+			if err != nil {
+				errs <- errors.WithMessage(err, "could not store option in search index")
+
+				continue
+			}
+			field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), indexAPI.StoreField)
+			newDoc := doc.AddField(field)
+
+			// slog.Debug("adding option to index", "name", opt.Name)
+			err = batch.IndexAdvanced(newDoc)
+
+			if err != nil {
+				errs <- errors.WithMessagef(err, "could not index option %s", opt.Name)
+
+				continue
+			}
+		}
+
+		size := batch.Size()
+		slog.Debug("flushing batch", "size", size)
+
+		err := i.index.Batch(batch)
+		if err != nil {
+			errs <- errors.WithMessagef(err, "could not flush batch")
+		}
+	}()
+
+	return errs
+}
+
+func (i *WriteIndex) Close() error {
+	err := i.index.Close()
+	if err != nil {
+		return errors.WithMessagef(err, "could not close index")
+	}
+
+	return nil
+}
diff --git a/internal/index/search.go b/internal/index/search.go
new file mode 100644
index 0000000..d069510
--- /dev/null
+++ b/internal/index/search.go
@@ -0,0 +1,102 @@
+package index
+
+import (
+	"bytes"
+	"context"
+	"encoding/gob"
+	"searchix/internal/options"
+
+	"github.com/blevesearch/bleve/v2"
+	"github.com/blevesearch/bleve/v2/search"
+	"github.com/pkg/errors"
+)
+
+const ResultsPerPage = 20
+
+type DocumentMatch struct {
+	search.DocumentMatch
+	Data options.NixOption
+}
+
+type Result struct {
+	*bleve.SearchResult
+	Hits []DocumentMatch
+}
+
+type ReadIndex struct {
+	index bleve.Index
+	meta  *Meta
+}
+
+func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) {
+	query := bleve.NewTermQuery(name)
+	query.SetField("Source")
+	search := bleve.NewSearchRequest(query)
+
+	result, err := index.index.SearchInContext(ctx, search)
+
+	select {
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	default:
+		if err != nil {
+			return nil, errors.WithMessagef(
+				err,
+				"failed to execute search to find source %s in index",
+				name,
+			)
+		}
+	}
+
+	return result, nil
+}
+
+func (index *ReadIndex) Search(
+	ctx context.Context,
+	source string,
+	keyword string,
+	from uint64,
+) (*Result, error) {
+	sourceQuery := bleve.NewTermQuery(source)
+	userQuery := bleve.NewMatchQuery(keyword)
+	userQuery.Analyzer = "option_name"
+
+	query := bleve.NewConjunctionQuery(sourceQuery, userQuery)
+
+	search := bleve.NewSearchRequest(query)
+	search.Size = ResultsPerPage
+	search.Fields = []string{"_data"}
+
+	if from != 0 {
+		search.From = int(from)
+	}
+
+	bleveResult, err := index.index.SearchInContext(ctx, search)
+	select {
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	default:
+		if err != nil {
+			return nil, errors.WithMessage(err, "failed to execute search query")
+		}
+
+		results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total))
+		var buf bytes.Buffer
+		for i, result := range bleveResult.Hits {
+			_, err = buf.WriteString(result.Fields["_data"].(string))
+			if err != nil {
+				return nil, errors.WithMessage(err, "error fetching result data")
+			}
+			err = gob.NewDecoder(&buf).Decode(&results[i].Data)
+			if err != nil {
+				return nil, errors.WithMessagef(err, "error decoding gob data: %s", buf.String())
+			}
+			buf.Reset()
+		}
+
+		return &Result{
+			SearchResult: bleveResult,
+			Hits:         results,
+		}, nil
+	}
+}