about summary refs log tree commit diff stats
path: root/internal/fetcher/http.go
diff options
context:
space:
mode:
authorAlan Pearce2024-05-23 13:14:45 +0200
committerAlan Pearce2024-05-23 13:14:45 +0200
commit0dbfe37fbddb95c184d845c79bbe014597d55fe8 (patch)
treee68a2db861211ceebe4c357a059a4cb511f707a9 /internal/fetcher/http.go
parent3053e41b1528ef898cccd44e056e4d167619af6b (diff)
downloadsearchix-0dbfe37fbddb95c184d845c79bbe014597d55fe8.tar.lz
searchix-0dbfe37fbddb95c184d845c79bbe014597d55fe8.tar.zst
searchix-0dbfe37fbddb95c184d845c79bbe014597d55fe8.zip
feat: stream files directly from fetcher to importer
Use IndexMeta to store the information relevant to making conditional
updates in future runs.
Diffstat (limited to 'internal/fetcher/http.go')
-rw-r--r--internal/fetcher/http.go74
1 files changed, 48 insertions, 26 deletions
diff --git a/internal/fetcher/http.go b/internal/fetcher/http.go
index 9afbbc0..675c3b3 100644
--- a/internal/fetcher/http.go
+++ b/internal/fetcher/http.go
@@ -3,68 +3,90 @@ package fetcher
 import (
 	"context"
 	"fmt"
+	"io"
 	"log/slog"
 	"net/http"
-	"os"
 	"searchix/internal/config"
-	"searchix/internal/file"
 	"strings"
 	"time"
 
+	"github.com/andybalholm/brotli"
 	"github.com/pkg/errors"
 )
 
-func fetchFileIfNeeded(ctx context.Context, path string, url string) (needed bool, err error) {
-	stat, err := file.StatIfExists(path)
-	if err != nil {
-		return false, errors.WithMessagef(err, "could not stat file %s", path)
+type brotliReadCloser struct {
+	src io.ReadCloser
+	*brotli.Reader
+}
+
+func newBrotliReader(src io.ReadCloser) *brotliReadCloser {
+	return &brotliReadCloser{
+		src:    src,
+		Reader: brotli.NewReader(src),
 	}
+}
 
-	var mtime string
-	if stat != nil {
-		mtime = strings.Replace(stat.ModTime().UTC().Format(time.RFC1123), "UTC", "GMT", 1)
+func (r *brotliReadCloser) Close() error {
+	return errors.Wrap(r.src.Close(), "failed to call close on underlying reader")
+}
+
+func fetchFileIfNeeded(
+	ctx context.Context,
+	mtime time.Time,
+	url string,
+) (body io.ReadCloser, newMtime time.Time, err error) {
+	var ifModifiedSince string
+	if !mtime.IsZero() {
+		ifModifiedSince = strings.Replace(mtime.UTC().Format(time.RFC1123), "UTC", "GMT", 1)
 	}
 
 	req, err := http.NewRequestWithContext(ctx, "GET", url, http.NoBody)
 	if err != nil {
-		return false, errors.WithMessagef(err, "could not create HTTP request for %s", url)
+		err = errors.WithMessagef(err, "could not create HTTP request for %s", url)
+
+		return
 	}
 
 	req.Header.Set("User-Agent", fmt.Sprintf("Searchix %s", config.ShortSHA))
 
-	if mtime != "" {
-		req.Header.Set("If-Modified-Since", mtime)
+	if ifModifiedSince != "" {
+		req.Header.Set("If-Modified-Since", ifModifiedSince)
 	}
 	res, err := http.DefaultClient.Do(req)
 	if err != nil {
-		return false, errors.WithMessagef(err, "could not make HTTP request to %s", url)
+		err = errors.WithMessagef(err, "could not make HTTP request to %s", url)
+
+		return
 	}
-	defer res.Body.Close()
 
 	switch res.StatusCode {
 	case http.StatusNotModified:
-		needed = false
+		newMtime = mtime
+
+		return
 	case http.StatusOK:
-		newMtime, err := time.Parse(time.RFC1123, res.Header.Get("Last-Modified"))
+		newMtime, err = time.Parse(time.RFC1123, res.Header.Get("Last-Modified"))
 		if err != nil {
 			slog.Warn(
 				"could not parse Last-Modified header from response",
 				"value",
 				res.Header.Get("Last-Modified"),
 			)
+			newMtime = time.Now()
 		}
-		err = file.WriteToFile(path, res.Body)
-		if err != nil {
-			return false, errors.WithMessagef(err, "could not write response body to file %s", path)
-		}
-		err = os.Chtimes(path, time.Time{}, newMtime)
-		if err != nil {
-			slog.Warn("could not update mtime on file", "file", path)
+
+		switch ce := res.Header.Get("Content-Encoding"); ce {
+		case "br":
+			slog.Debug("using brotli encoding")
+			body = newBrotliReader(res.Body)
+		case "", "identity", "gzip":
+			body = res.Body
+		default:
+			err = fmt.Errorf("cannot handle a body with content-encoding %s", ce)
 		}
-		needed = true
 	default:
-		return false, fmt.Errorf("got response code %d, don't know what to do", res.StatusCode)
+		err = fmt.Errorf("got response code %d, don't know what to do", res.StatusCode)
 	}
 
-	return needed, nil
+	return
 }