about summary refs log tree commit diff stats
path: root/internal/search
diff options
context:
space:
mode:
authorAlan Pearce2024-05-13 21:50:14 +0200
committerAlan Pearce2024-05-13 22:25:46 +0200
commit6b40e0c0fa90f11be14a93f1d6275779fd645cac (patch)
tree0df5bea87242450829ca441d235c077a0e09a149 /internal/search
parent2722f24af87f437ed9fcb8cc743ad1784141fd3a (diff)
downloadsearchix-6b40e0c0fa90f11be14a93f1d6275779fd645cac.tar.lz
searchix-6b40e0c0fa90f11be14a93f1d6275779fd645cac.tar.zst
searchix-6b40e0c0fa90f11be14a93f1d6275779fd645cac.zip
refactor: combine import and web server into one binary
Diffstat (limited to 'internal/search')
-rw-r--r--internal/search/index_meta.go73
-rw-r--r--internal/search/indexer.go292
-rw-r--r--internal/search/search.go123
3 files changed, 0 insertions, 488 deletions
diff --git a/internal/search/index_meta.go b/internal/search/index_meta.go
deleted file mode 100644
index bb7e69f..0000000
--- a/internal/search/index_meta.go
+++ /dev/null
@@ -1,73 +0,0 @@
-package search
-
-import (
-	"encoding/json"
-	"log/slog"
-	"os"
-	"searchix/internal/file"
-
-	"github.com/pkg/errors"
-)
-
-const CurrentSchemaVersion = 1
-
-type IndexMeta struct {
-	path          string
-	SchemaVersion int
-}
-
-func createMeta(path string) (*IndexMeta, error) {
-	exists, err := file.Exists(path)
-	if err != nil {
-		return nil, errors.WithMessage(err, "could not check for existence of index metadata")
-	}
-	if exists {
-		return nil, errors.New("index metadata already exists")
-	}
-
-	return &IndexMeta{
-		path:          path,
-		SchemaVersion: CurrentSchemaVersion,
-	}, nil
-}
-
-func openMeta(path string) (*IndexMeta, error) {
-	j, err := os.ReadFile(path)
-	if err != nil {
-		return nil, errors.WithMessage(err, "could not open index metadata file")
-	}
-	var meta IndexMeta
-	err = json.Unmarshal(j, &meta)
-	if err != nil {
-		return nil, errors.WithMessage(err, "index metadata is corrupt, try replacing the index")
-	}
-
-	meta.checkSchemaVersion()
-
-	return &meta, nil
-}
-
-func (i *IndexMeta) checkSchemaVersion() {
-	if i.SchemaVersion < CurrentSchemaVersion {
-		slog.Warn(
-			"Index schema version out of date, suggest re-indexing",
-			"schema_version",
-			i.SchemaVersion,
-			"latest_version",
-			CurrentSchemaVersion,
-		)
-	}
-}
-
-func (i *IndexMeta) Save() error {
-	j, err := json.Marshal(i)
-	if err != nil {
-		return errors.WithMessage(err, "could not prepare index metadata for saving")
-	}
-	err = os.WriteFile(i.path, j, 0o600)
-	if err != nil {
-		return errors.WithMessage(err, "could not save index metadata")
-	}
-
-	return nil
-}
diff --git a/internal/search/indexer.go b/internal/search/indexer.go
deleted file mode 100644
index a74189e..0000000
--- a/internal/search/indexer.go
+++ /dev/null
@@ -1,292 +0,0 @@
-package search
-
-import (
-	"bytes"
-	"context"
-	"encoding/gob"
-	"io/fs"
-	"log"
-	"log/slog"
-	"os"
-	"path"
-	"searchix/internal/file"
-	"searchix/internal/options"
-	"slices"
-
-	"github.com/blevesearch/bleve/v2"
-	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
-	"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
-	"github.com/blevesearch/bleve/v2/analysis/analyzer/web"
-	"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
-	"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
-	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
-	"github.com/blevesearch/bleve/v2/document"
-	"github.com/blevesearch/bleve/v2/mapping"
-	index "github.com/blevesearch/bleve_index_api"
-	"github.com/pkg/errors"
-)
-
-type WriteIndex struct {
-	index bleve.Index
-	meta  *IndexMeta
-}
-
-func createIndexMapping() (mapping.IndexMapping, error) {
-	indexMapping := bleve.NewIndexMapping()
-	indexMapping.StoreDynamic = false
-	indexMapping.IndexDynamic = false
-	indexMapping.TypeField = "BleveType"
-
-	textFieldMapping := bleve.NewTextFieldMapping()
-	textFieldMapping.Store = false
-
-	descriptionFieldMapping := bleve.NewTextFieldMapping()
-	descriptionFieldMapping.Store = false
-	descriptionFieldMapping.Analyzer = web.Name
-
-	err := indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{
-		"type":      custom.Name,
-		"tokenizer": letter.Name,
-		"token_filters": []string{
-			camelcase.Name,
-		},
-	})
-	if err != nil {
-		return nil, errors.WithMessage(err, "could not add custom analyser")
-	}
-	err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{
-		"type":      keyword.Name,
-		"tokenizer": letter.Name,
-		"token_filters": []string{
-			camelcase.Name,
-		},
-	})
-	if err != nil {
-		return nil, errors.WithMessage(err, "could not add custom analyser")
-	}
-	err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{
-		"type":      keyword.Name,
-		"tokenizer": single.Name,
-	})
-	if err != nil {
-		return nil, errors.WithMessage(err, "could not add custom analyser")
-	}
-
-	keywordFieldMapping := bleve.NewKeywordFieldMapping()
-	keywordFieldMapping.Analyzer = "keyword_single"
-
-	nameMapping := bleve.NewTextFieldMapping()
-	nameMapping.Analyzer = "option_name"
-	nameMapping.IncludeTermVectors = true
-	nameMapping.Store = false
-
-	nixValueMapping := bleve.NewDocumentStaticMapping()
-	nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping)
-	nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping)
-
-	locFieldMapping := bleve.NewKeywordFieldMapping()
-	locFieldMapping.Analyzer = "loc"
-	locFieldMapping.IncludeTermVectors = true
-	locFieldMapping.Store = false
-
-	optionMapping := bleve.NewDocumentStaticMapping()
-
-	optionMapping.AddFieldMappingsAt("Name", keywordFieldMapping)
-	optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping)
-	optionMapping.AddFieldMappingsAt("Loc", locFieldMapping)
-	optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping)
-	optionMapping.AddFieldMappingsAt("Description", textFieldMapping)
-
-	optionMapping.AddSubDocumentMapping("Default", nixValueMapping)
-	optionMapping.AddSubDocumentMapping("Example", nixValueMapping)
-
-	indexMapping.AddDocumentMapping("option", optionMapping)
-
-	return indexMapping, nil
-}
-
-func createIndex(indexPath string) (bleve.Index, error) {
-	indexMapping, err := createIndexMapping()
-	if err != nil {
-		return nil, err
-	}
-	idx, err := bleve.NewUsing(
-		indexPath,
-		indexMapping,
-		bleve.Config.DefaultIndexType,
-		bleve.Config.DefaultKVStore,
-		map[string]interface{}{
-			"nosync": true,
-		},
-	)
-	if err != nil {
-		return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath)
-	}
-
-	return idx, nil
-}
-
-const (
-	indexBaseName = "index.bleve"
-	metaBaseName  = "meta.json"
-)
-
-var expectedDataFiles = []string{
-	metaBaseName,
-	indexBaseName,
-	"sources",
-}
-
-func deleteIndex(dataRoot string) error {
-	dir, err := os.ReadDir(dataRoot)
-	if err != nil {
-		return errors.WithMessagef(err, "could not read data directory %s", dataRoot)
-	}
-	remainingFiles := slices.DeleteFunc(dir, func(e fs.DirEntry) bool {
-		return slices.Contains(expectedDataFiles, e.Name())
-	})
-	if len(remainingFiles) > 0 {
-		return errors.Errorf(
-			"cowardly refusing to remove data directory %s as it contains unknown files: %v",
-			dataRoot,
-			remainingFiles,
-		)
-	}
-
-	err = os.RemoveAll(dataRoot)
-	if err != nil {
-		return errors.WithMessagef(err, "could not remove data directory %s", dataRoot)
-	}
-
-	return nil
-}
-
-func NewIndexer(dataRoot string, force bool) (*WriteIndex, error) {
-	var err error
-	bleve.SetLog(log.Default())
-
-	indexPath := path.Join(dataRoot, indexBaseName)
-	metaPath := path.Join(dataRoot, metaBaseName)
-
-	exists, err := file.Exists(indexPath)
-	if err != nil {
-		return nil, errors.WithMessagef(
-			err,
-			"could not check if index exists at path %s",
-			indexPath,
-		)
-	}
-
-	var idx bleve.Index
-	var meta *IndexMeta
-	if !exists || force {
-		if force {
-			err = deleteIndex(dataRoot)
-			if err != nil {
-				return nil, err
-			}
-		}
-		idx, err = createIndex(indexPath)
-		if err != nil {
-			return nil, err
-		}
-
-		meta, err = createMeta(metaPath)
-		if err != nil {
-			return nil, err
-		}
-
-		err = meta.Save()
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		idx, err = bleve.Open(indexPath)
-		if err != nil {
-			return nil, errors.WithMessagef(err, "could not open index at path %s", indexPath)
-		}
-
-		meta, err = openMeta(metaPath)
-		if err != nil {
-			return nil, err
-		}
-
-	}
-
-	return &WriteIndex{
-		idx,
-		meta,
-	}, nil
-}
-
-func (i *WriteIndex) ImportOptions(
-	ctx context.Context,
-	objects <-chan *options.NixOption,
-) <-chan error {
-	var err error
-	errs := make(chan error)
-
-	go func() {
-		defer close(errs)
-		batch := i.index.NewBatch()
-		indexMapping := i.index.Mapping()
-
-	outer:
-		for opt := range objects {
-			select {
-			case <-ctx.Done():
-				slog.Debug("context cancelled")
-
-				break outer
-			default:
-			}
-
-			doc := document.NewDocument(opt.Source + "/" + opt.Name)
-			err = indexMapping.MapDocument(doc, opt)
-			if err != nil {
-				errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Name)
-
-				continue
-			}
-
-			var data bytes.Buffer
-			enc := gob.NewEncoder(&data)
-			err = enc.Encode(opt)
-			if err != nil {
-				errs <- errors.WithMessage(err, "could not store option in search index")
-
-				continue
-			}
-			field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField)
-			newDoc := doc.AddField(field)
-
-			// slog.Debug("adding option to index", "name", opt.Name)
-			err = batch.IndexAdvanced(newDoc)
-
-			if err != nil {
-				errs <- errors.WithMessagef(err, "could not index option %s", opt.Name)
-
-				continue
-			}
-		}
-
-		size := batch.Size()
-		slog.Debug("flushing batch", "size", size)
-
-		err := i.index.Batch(batch)
-		if err != nil {
-			errs <- errors.WithMessagef(err, "could not flush batch")
-		}
-	}()
-
-	return errs
-}
-
-func (i *WriteIndex) Close() error {
-	err := i.index.Close()
-	if err != nil {
-		return errors.WithMessagef(err, "could not close index")
-	}
-
-	return nil
-}
diff --git a/internal/search/search.go b/internal/search/search.go
deleted file mode 100644
index c930f15..0000000
--- a/internal/search/search.go
+++ /dev/null
@@ -1,123 +0,0 @@
-package search
-
-import (
-	"bytes"
-	"context"
-	"encoding/gob"
-	"path"
-	"searchix/internal/options"
-
-	"github.com/blevesearch/bleve/v2"
-	"github.com/blevesearch/bleve/v2/search"
-	"github.com/pkg/errors"
-)
-
-const ResultsPerPage = 20
-
-type DocumentMatch struct {
-	search.DocumentMatch
-	Data options.NixOption
-}
-
-type Result struct {
-	*bleve.SearchResult
-	Hits []DocumentMatch
-}
-
-type ReadIndex struct {
-	index bleve.Index
-	meta  *IndexMeta
-}
-
-func Open(dataRoot string) (*ReadIndex, error) {
-	indexPath := path.Join(dataRoot, indexBaseName)
-	metaPath := path.Join(dataRoot, metaBaseName)
-
-	idx, err := bleve.Open(indexPath)
-	if err != nil {
-		return nil, errors.WithMessagef(err, "unable to open index at path %s", indexPath)
-	}
-
-	meta, err := openMeta(metaPath)
-	if err != nil {
-		return nil, errors.WithMessagef(err, "unable to open metadata at path %s", metaPath)
-	}
-
-	return &ReadIndex{
-		idx,
-		meta,
-	}, nil
-}
-
-func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) {
-	query := bleve.NewTermQuery(name)
-	query.SetField("Source")
-	search := bleve.NewSearchRequest(query)
-
-	result, err := index.index.SearchInContext(ctx, search)
-
-	select {
-	case <-ctx.Done():
-		return nil, ctx.Err()
-	default:
-		if err != nil {
-			return nil, errors.WithMessagef(
-				err,
-				"failed to execute search to find source %s in index",
-				name,
-			)
-		}
-	}
-
-	return result, nil
-}
-
-func (index *ReadIndex) Search(
-	ctx context.Context,
-	source string,
-	keyword string,
-	from uint64,
-) (*Result, error) {
-	sourceQuery := bleve.NewTermQuery(source)
-	userQuery := bleve.NewMatchQuery(keyword)
-	userQuery.Analyzer = "option_name"
-
-	query := bleve.NewConjunctionQuery(sourceQuery, userQuery)
-
-	search := bleve.NewSearchRequest(query)
-	search.Size = ResultsPerPage
-	search.Fields = []string{"_data"}
-
-	if from != 0 {
-		search.From = int(from)
-	}
-
-	bleveResult, err := index.index.SearchInContext(ctx, search)
-	select {
-	case <-ctx.Done():
-		return nil, ctx.Err()
-	default:
-		if err != nil {
-			return nil, errors.WithMessage(err, "failed to execute search query")
-		}
-
-		results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total))
-		var buf bytes.Buffer
-		for i, result := range bleveResult.Hits {
-			_, err = buf.WriteString(result.Fields["_data"].(string))
-			if err != nil {
-				return nil, errors.WithMessage(err, "error fetching result data")
-			}
-			err = gob.NewDecoder(&buf).Decode(&results[i].Data)
-			if err != nil {
-				return nil, errors.WithMessagef(err, "error decoding gob data: %s", buf.String())
-			}
-			buf.Reset()
-		}
-
-		return &Result{
-			SearchResult: bleveResult,
-			Hits:         results,
-		}, nil
-	}
-}