about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAlan Pearce2024-05-17 10:51:42 +0200
committerAlan Pearce2024-05-17 12:21:19 +0200
commitb8da487f774aa2399b79bbde5f2e6e13be4fce1c (patch)
treeff3379ea1ac598a025f7e70ccc6c7fc3d0b44434
parent27b07a80f1872205dfeb949e9b90cf85f6e43744 (diff)
downloadsearchix-b8da487f774aa2399b79bbde5f2e6e13be4fce1c.tar.lz
searchix-b8da487f774aa2399b79bbde5f2e6e13be4fce1c.tar.zst
searchix-b8da487f774aa2399b79bbde5f2e6e13be4fce1c.zip
feat: decode brotli-compressed files
-rw-r--r--go.mod1
-rw-r--r--go.sum2
-rw-r--r--gomod2nix.toml3
-rw-r--r--internal/importer/main.go14
-rw-r--r--internal/importer/options.go10
-rw-r--r--internal/importer/package.go10
-rw-r--r--internal/importer/utils.go41
7 files changed, 65 insertions, 16 deletions
diff --git a/go.mod b/go.mod
index 17d0ed4..b2f2ad4 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module searchix
 go 1.22.2
 
 require (
+	github.com/andybalholm/brotli v1.1.0
 	github.com/bcicen/jstream v1.0.1
 	github.com/blevesearch/bleve/v2 v2.4.0
 	github.com/blevesearch/bleve_index_api v1.1.6
diff --git a/go.sum b/go.sum
index 519ca16..2492cec 100644
--- a/go.sum
+++ b/go.sum
@@ -1,5 +1,7 @@
 github.com/RoaringBitmap/roaring v1.9.3 h1:t4EbC5qQwnisr5PrP9nt0IRhRTb9gMUgQF4t4S2OByM=
 github.com/RoaringBitmap/roaring v1.9.3/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
+github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
+github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
 github.com/bcicen/jstream v1.0.1 h1:BXY7Cu4rdmc0rhyTVyT3UkxAiX3bnLpKLas9btbH5ck=
 github.com/bcicen/jstream v1.0.1/go.mod h1:9ielPxqFry7Y4Tg3j4BfjPocfJ3TbsRtXOAYXYmRuAQ=
 github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
diff --git a/gomod2nix.toml b/gomod2nix.toml
index 12e719c..10abba0 100644
--- a/gomod2nix.toml
+++ b/gomod2nix.toml
@@ -4,6 +4,9 @@ schema = 3
   [mod."github.com/RoaringBitmap/roaring"]
     version = "v1.9.3"
     hash = "sha256-LZfRufkU4UhuEcgxuCPd6divX2KIdcHp1FOt79mQV7Q="
+  [mod."github.com/andybalholm/brotli"]
+    version = "v1.1.0"
+    hash = "sha256-njLViV4v++ZdgOWGWzlvkefuFvA/nkugl3Ta/h1nu/0="
   [mod."github.com/bcicen/jstream"]
     version = "v1.0.1"
     hash = "sha256-mm+/BuIEYYj6XOHCCJLxVMKd1XcBXCiRCWA+aTvr1sE="
diff --git a/internal/importer/main.go b/internal/importer/main.go
index d2b66e1..7776482 100644
--- a/internal/importer/main.go
+++ b/internal/importer/main.go
@@ -2,6 +2,7 @@ package importer
 
 import (
 	"context"
+	"io"
 	"log/slog"
 	"os/exec"
 	"path"
@@ -59,6 +60,7 @@ func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error {
 				logger.Warn("could not set source repo revision", "error", err)
 			}
 
+			var file io.ReadCloser
 			var processor Processor
 			switch source.Importer {
 			case config.Options:
@@ -69,7 +71,11 @@ func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error {
 					"revision",
 					source.Repo.Revision,
 				)
-				processor, err = NewOptionProcessor(files.Options, source)
+				file, err = openFileDecoded(files.Options)
+				if err != nil {
+					logger.Warn("could not open file", "filename", files.Options, "error", err)
+				}
+				processor, err = NewOptionProcessor(file, source)
 			case config.Packages:
 				logger.Debug(
 					"creating processor",
@@ -78,7 +84,11 @@ func Start(cfg *config.Config, indexer *index.WriteIndex, replace bool) error {
 					"revision",
 					source.Repo.Revision,
 				)
-				processor, err = NewPackageProcessor(files.Packages, source)
+				file, err = openFileDecoded(files.Packages)
+				if err != nil {
+					logger.Warn("could not open file", "filename", files.Packages, "error", err)
+				}
+				processor, err = NewPackageProcessor(file, source)
 			}
 			if err != nil {
 				logger.Warn("failed to create processor", "type", source.Importer, "error", err)
diff --git a/internal/importer/options.go b/internal/importer/options.go
index ec2c20f..0aadc50 100644
--- a/internal/importer/options.go
+++ b/internal/importer/options.go
@@ -2,8 +2,8 @@ package importer
 
 import (
 	"context"
+	"io"
 	"log/slog"
-	"os"
 	"reflect"
 	"searchix/internal/config"
 	"searchix/internal/nix"
@@ -58,15 +58,11 @@ type OptionIngester struct {
 	dec     *jstream.Decoder
 	ms      *mapstructure.Decoder
 	optJSON nixOptionJSON
-	infile  *os.File
+	infile  io.ReadCloser
 	source  *config.Source
 }
 
-func NewOptionProcessor(inpath string, source *config.Source) (*OptionIngester, error) {
-	infile, err := os.Open(inpath)
-	if err != nil {
-		return nil, errors.WithMessagef(err, "failed to open input file %s", inpath)
-	}
+func NewOptionProcessor(infile io.ReadCloser, source *config.Source) (*OptionIngester, error) {
 	i := OptionIngester{
 		dec:     jstream.NewDecoder(infile, 1).EmitKV(),
 		optJSON: nixOptionJSON{},
diff --git a/internal/importer/package.go b/internal/importer/package.go
index 3e0ec83..a1a6c67 100644
--- a/internal/importer/package.go
+++ b/internal/importer/package.go
@@ -3,8 +3,8 @@ package importer
 import (
 	"context"
 	"encoding/json"
+	"io"
 	"log/slog"
-	"os"
 	"reflect"
 	"searchix/internal/config"
 	"searchix/internal/nix"
@@ -41,7 +41,7 @@ type PackageIngester struct {
 	dec    *jstream.Decoder
 	ms     *mapstructure.Decoder
 	pkg    *packageJSON
-	infile *os.File
+	infile io.ReadCloser
 	source *config.Source
 }
 
@@ -60,11 +60,7 @@ func makeAdhocPlatform(v any) string {
 	return string(s)
 }
 
-func NewPackageProcessor(inpath string, source *config.Source) (*PackageIngester, error) {
-	infile, err := os.Open(inpath)
-	if err != nil {
-		return nil, errors.WithMessagef(err, "failed to open input file %s", inpath)
-	}
+func NewPackageProcessor(infile io.ReadCloser, source *config.Source) (*PackageIngester, error) {
 	i := &PackageIngester{
 		dec:    jstream.NewDecoder(infile, 2).EmitKV(),
 		pkg:    &packageJSON{},
diff --git a/internal/importer/utils.go b/internal/importer/utils.go
index 3eb034f..1c0d3af 100644
--- a/internal/importer/utils.go
+++ b/internal/importer/utils.go
@@ -3,11 +3,14 @@ package importer
 import (
 	"bytes"
 	"fmt"
+	"io"
 	"net/url"
 	"os"
+	"path"
 	"searchix/internal/config"
 	"searchix/internal/nix"
 
+	"github.com/andybalholm/brotli"
 	"github.com/bcicen/jstream"
 	"github.com/pkg/errors"
 )
@@ -80,3 +83,41 @@ func setRepoRevision(filename string, source *config.Source) error {
 
 	return nil
 }
+
+type brotliReadCloser struct {
+	src io.ReadCloser
+	*brotli.Reader
+}
+
+func newBrotliReader(src io.ReadCloser) *brotliReadCloser {
+	return &brotliReadCloser{
+		src:    src,
+		Reader: brotli.NewReader(src),
+	}
+}
+
+func (r *brotliReadCloser) Close() error {
+	return errors.Wrap(r.src.Close(), "failed to call close on underlying reader")
+}
+
+func openFileDecoded(filename string) (io.ReadCloser, error) {
+	var reader io.ReadCloser
+	var err error
+	ext := path.Ext(filename)
+	reader, err = os.Open(filename)
+	if err != nil {
+		return nil, errors.WithMessagef(err, "failed to open file %s", filename)
+	}
+	switch ext {
+	case ".json":
+	// nothing to do
+	case ".br":
+		reader = newBrotliReader(reader)
+	default:
+		reader.Close()
+
+		return nil, errors.Errorf("invalid file extension %s", ext)
+	}
+
+	return reader, nil
+}