about summary refs log tree commit diff stats
path: root/internal
diff options
context:
space:
mode:
authorAlan Pearce2024-05-17 15:51:27 +0200
committerAlan Pearce2024-05-17 15:51:27 +0200
commit9f241bff227608dd53a250d012116077dce6dab6 (patch)
treec8f1f19788f866548dd42baaf7fedfd0560a47b7 /internal
parent43fe431c3aff36d52a630ca670f9fc4ee161962f (diff)
downloadsearchix-9f241bff227608dd53a250d012116077dce6dab6.tar.lz
searchix-9f241bff227608dd53a250d012116077dce6dab6.tar.zst
searchix-9f241bff227608dd53a250d012116077dce6dab6.zip
feat: automatically add/remove sources to/from index
Diffstat (limited to 'internal')
-rw-r--r--internal/importer/main.go18
-rw-r--r--internal/index/indexer.go35
-rw-r--r--internal/index/search.go19
3 files changed, 70 insertions, 2 deletions
diff --git a/internal/importer/main.go b/internal/importer/main.go
index 7776482..747d813 100644
--- a/internal/importer/main.go
+++ b/internal/importer/main.go
@@ -9,12 +9,18 @@ import (
 	"searchix/internal/config"
 	"searchix/internal/fetcher"
 	"searchix/internal/index"
+	"slices"
 	"strings"
 
 	"github.com/pkg/errors"
 )
 
-func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error {
+func Start(
+	cfg *config.Config,
+	indexer *index.WriteIndex,
+	forceUpdate bool,
+	onlyUpdateSources *[]string,
+) error {
 	if len(cfg.Importer.Sources) == 0 {
 		slog.Info("No sources enabled")
 
@@ -24,7 +30,15 @@ func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error {
 	ctx, cancel := context.WithTimeout(context.Background(), cfg.Importer.Timeout.Duration)
 	defer cancel()
 
+	forceUpdate = forceUpdate || (onlyUpdateSources != nil && len(*onlyUpdateSources) > 0)
+
 	for name, source := range cfg.Importer.Sources {
+		if len(*onlyUpdateSources) > 0 {
+			if !slices.Contains(*onlyUpdateSources, name) {
+				continue
+			}
+		}
+
 		logger := slog.With("name", name, "fetcher", source.Fetcher.String())
 		logger.Debug("starting fetcher")
 
@@ -54,7 +68,7 @@ func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error {
 		}
 		logger.Info("importer fetch succeeded", "updated", updated)
 
-		if updated || replace {
+		if updated || forceUpdate {
 			err = setRepoRevision(files.Revision, source)
 			if err != nil {
 				logger.Warn("could not set source repo revision", "error", err)
diff --git a/internal/index/indexer.go b/internal/index/indexer.go
index a661b61..4a6a9d8 100644
--- a/internal/index/indexer.go
+++ b/internal/index/indexer.go
@@ -7,6 +7,7 @@ import (
 	"io/fs"
 	"log"
 	"log/slog"
+	"math"
 	"os"
 	"path"
 	"searchix/internal/file"
@@ -326,3 +327,37 @@ func (i *WriteIndex) Close() error {
 
 	return nil
 }
+
+func (i *WriteIndex) DeleteBySource(source string) error {
+	query := bleve.NewTermQuery(source)
+	search := bleve.NewSearchRequest(query)
+	search.Size = math.MaxInt
+	search.Fields = []string{"_id"}
+
+	results, err := i.index.Search(search)
+	if err != nil {
+		return errors.WithMessagef(err, "failed to query documents of retired index %s", source)
+	}
+
+	batch := i.index.NewBatch()
+	var k uint
+	for _, hit := range results.Hits {
+		batch.Delete(hit.ID)
+		if k++; k%batchSize == 0 {
+			err := i.Flush(batch)
+			if err != nil {
+				return err
+			}
+		}
+	}
+	err = i.Flush(batch)
+	if err != nil {
+		return err
+	}
+
+	if uint64(search.Size) < results.Total {
+		return i.DeleteBySource(source) // unlikely :^)
+	}
+
+	return nil
+}
diff --git a/internal/index/search.go b/internal/index/search.go
index 5c18edb..0b20063 100644
--- a/internal/index/search.go
+++ b/internal/index/search.go
@@ -29,6 +29,25 @@ type ReadIndex struct {
 	meta  *Meta
 }
 
+func (index *ReadIndex) GetEnabledSources() ([]string, error) {
+	facet := bleve.NewFacetRequest("Source", 100)
+	query := bleve.NewMatchAllQuery()
+	search := bleve.NewSearchRequest(query)
+	search.AddFacet("Source", facet)
+
+	results, err := index.index.Search(search)
+	if err != nil {
+		return nil, errors.WithMessage(err, "could not get list of enabled sources from index")
+	}
+
+	enabledSources := make([]string, results.Facets["Source"].Terms.Len())
+	for i, term := range results.Facets["Source"].Terms.Terms() {
+		enabledSources[i] = term.Term
+	}
+
+	return enabledSources, nil
+}
+
 func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) {
 	query := bleve.NewTermQuery(name)
 	query.SetField("Source")