about summary refs log tree commit diff stats
path: root/internal/fetcher/download.go
diff options
context:
space:
mode:
authorAlan Pearce2024-05-23 13:14:45 +0200
committerAlan Pearce2024-05-23 13:14:45 +0200
commit0dbfe37fbddb95c184d845c79bbe014597d55fe8 (patch)
treee68a2db861211ceebe4c357a059a4cb511f707a9 /internal/fetcher/download.go
parent3053e41b1528ef898cccd44e056e4d167619af6b (diff)
downloadsearchix-0dbfe37fbddb95c184d845c79bbe014597d55fe8.tar.lz
searchix-0dbfe37fbddb95c184d845c79bbe014597d55fe8.tar.zst
searchix-0dbfe37fbddb95c184d845c79bbe014597d55fe8.zip
feat: stream files directly from fetcher to importer
Use IndexMeta to store the information relevant to making conditional
updates in future runs.
Diffstat (limited to 'internal/fetcher/download.go')
-rw-r--r--internal/fetcher/download.go56
1 files changed, 23 insertions, 33 deletions
diff --git a/internal/fetcher/download.go b/internal/fetcher/download.go
index 2c7b8fd..59ef8d1 100644
--- a/internal/fetcher/download.go
+++ b/internal/fetcher/download.go
@@ -5,15 +5,13 @@ import (
 	"fmt"
 	"log/slog"
 	"net/url"
-	"path"
 	"searchix/internal/config"
-	"searchix/internal/file"
+	"searchix/internal/index"
 
 	"github.com/pkg/errors"
 )
 
 type DownloadFetcher struct {
-	DataPath   string
 	Source     *config.Source
 	SourceFile string
 	Logger     *slog.Logger
@@ -21,15 +19,13 @@ type DownloadFetcher struct {
 
 func NewDownloadFetcher(
 	source *config.Source,
-	dataPath string,
 	logger *slog.Logger,
 ) (*DownloadFetcher, error) {
 	switch source.Importer {
 	case config.Options:
 		return &DownloadFetcher{
-			DataPath: dataPath,
-			Source:   source,
-			Logger:   logger,
+			Source: source,
+			Logger: logger,
 		}, nil
 	default:
 		return nil, fmt.Errorf("unsupported importer type %s", source.Importer)
@@ -42,22 +38,11 @@ var files = map[string]string{
 }
 
 func (i *DownloadFetcher) FetchIfNeeded(
-	parent context.Context,
-) (f FetchedFiles, updated bool, err error) {
-	ctx, cancel := context.WithTimeout(parent, i.Source.FetchTimeout.Duration)
-	defer cancel()
-
-	root := i.DataPath
-
-	err = file.Mkdirp(root)
-	if err != nil {
-		err = errors.WithMessagef(err, "error creating directory for data: %s", root)
-
-		return
-	}
-
+	ctx context.Context,
+	sourceMeta *index.SourceMeta,
+) (f FetchedFiles, err error) {
 	var fetchURL string
-	for _, filename := range files {
+	for key, filename := range files {
 		fetchURL, err = url.JoinPath(i.Source.URL, filename)
 		if err != nil {
 			err = errors.WithMessagef(
@@ -70,23 +55,28 @@ func (i *DownloadFetcher) FetchIfNeeded(
 			return
 		}
 
-		outPath := path.Join(root, filename)
-
-		i.Logger.Debug("preparing to fetch URL", "url", fetchURL, "path", outPath)
+		i.Logger.Debug("preparing to fetch URL", "url", fetchURL)
 
-		updated, err = fetchFileIfNeeded(ctx, outPath, fetchURL)
+		body, mtime, err := fetchFileIfNeeded(ctx, sourceMeta.Updated, fetchURL)
 		if err != nil {
-			return
+			i.Logger.Warn("failed to fetch file", "url", fetchURL, "error", err)
+
+			return f, err
 		}
 		// don't bother to issue requests for the later files
-		if !updated {
+		if mtime.Before(sourceMeta.Updated) {
 			break
 		}
-	}
-
-	f = FetchedFiles{
-		Revision: path.Join(root, "revision"),
-		Options:  path.Join(root, "options.json"),
+		sourceMeta.Updated = mtime
+
+		switch key {
+		case "revision":
+			f.Revision = body
+		case "options":
+			f.Options = body
+		default:
+			return f, errors.Errorf("unknown file kind %s", key)
+		}
 	}
 
 	return