From 0dbfe37fbddb95c184d845c79bbe014597d55fe8 Mon Sep 17 00:00:00 2001 From: Alan Pearce Date: Thu, 23 May 2024 13:14:45 +0200 Subject: feat: stream files directly from fetcher to importer Use IndexMeta to store the information relevant to making conditional updates in future runs. --- internal/fetcher/download.go | 56 ++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) (limited to 'internal/fetcher/download.go') diff --git a/internal/fetcher/download.go b/internal/fetcher/download.go index 2c7b8fd..59ef8d1 100644 --- a/internal/fetcher/download.go +++ b/internal/fetcher/download.go @@ -5,15 +5,13 @@ import ( "fmt" "log/slog" "net/url" - "path" "searchix/internal/config" - "searchix/internal/file" + "searchix/internal/index" "github.com/pkg/errors" ) type DownloadFetcher struct { - DataPath string Source *config.Source SourceFile string Logger *slog.Logger @@ -21,15 +19,13 @@ type DownloadFetcher struct { func NewDownloadFetcher( source *config.Source, - dataPath string, logger *slog.Logger, ) (*DownloadFetcher, error) { switch source.Importer { case config.Options: return &DownloadFetcher{ - DataPath: dataPath, - Source: source, - Logger: logger, + Source: source, + Logger: logger, }, nil default: return nil, fmt.Errorf("unsupported importer type %s", source.Importer) @@ -42,22 +38,11 @@ var files = map[string]string{ } func (i *DownloadFetcher) FetchIfNeeded( - parent context.Context, -) (f FetchedFiles, updated bool, err error) { - ctx, cancel := context.WithTimeout(parent, i.Source.FetchTimeout.Duration) - defer cancel() - - root := i.DataPath - - err = file.Mkdirp(root) - if err != nil { - err = errors.WithMessagef(err, "error creating directory for data: %s", root) - - return - } - + ctx context.Context, + sourceMeta *index.SourceMeta, +) (f FetchedFiles, err error) { var fetchURL string - for _, filename := range files { + for key, filename := range files { fetchURL, err = url.JoinPath(i.Source.URL, filename) if err != nil { err = errors.WithMessagef( @@ -70,23 +55,28 @@ func (i *DownloadFetcher) FetchIfNeeded( return } - outPath := path.Join(root, filename) - - i.Logger.Debug("preparing to fetch URL", "url", fetchURL, "path", outPath) + i.Logger.Debug("preparing to fetch URL", "url", fetchURL) - updated, err = fetchFileIfNeeded(ctx, outPath, fetchURL) + body, mtime, err := fetchFileIfNeeded(ctx, sourceMeta.Updated, fetchURL) if err != nil { - return + i.Logger.Warn("failed to fetch file", "url", fetchURL, "error", err) + + return f, err } // don't bother to issue requests for the later files - if !updated { + if mtime.Before(sourceMeta.Updated) { break } - } - - f = FetchedFiles{ - Revision: path.Join(root, "revision"), - Options: path.Join(root, "options.json"), + sourceMeta.Updated = mtime + + switch key { + case "revision": + f.Revision = body + case "options": + f.Options = body + default: + return f, errors.Errorf("unknown file kind %s", key) + } } return -- cgit 1.4.1