diff options
author | Alan Pearce | 2024-05-09 16:47:41 +0200 |
---|---|---|
committer | Alan Pearce | 2024-05-09 19:27:55 +0200 |
commit | e062ca72b222b890e345548bd8422d5df98e9fef (patch) | |
tree | 89f52ebfdb1fb8069e6323d9dde42f5491dad5d1 /internal | |
parent | 967f6fdf5c1693d3aa27079b3ae28768fb7356c6 (diff) | |
download | searchix-e062ca72b222b890e345548bd8422d5df98e9fef.tar.lz searchix-e062ca72b222b890e345548bd8422d5df98e9fef.tar.zst searchix-e062ca72b222b890e345548bd8422d5df98e9fef.zip |
feat: import sources from configuration in go code and index options
Diffstat (limited to 'internal')
-rw-r--r-- | internal/config/config.go | 22 | ||||
-rw-r--r-- | internal/importer/channel.go | 82 | ||||
-rw-r--r-- | internal/importer/http.go | 63 | ||||
-rw-r--r-- | internal/importer/importer.go | 112 | ||||
-rw-r--r-- | internal/importer/ingest.go | 237 | ||||
-rw-r--r-- | internal/importer/nixpkgs-channel.go | 82 | ||||
-rw-r--r-- | internal/importer/repository.go | 44 | ||||
-rw-r--r-- | internal/importer/source-type.go | 44 | ||||
-rw-r--r-- | internal/options/option.go | 5 | ||||
-rw-r--r-- | internal/options/process.go | 224 | ||||
-rw-r--r-- | internal/search/indexer.go | 183 | ||||
-rw-r--r-- | internal/search/search.go | 150 | ||||
-rw-r--r-- | internal/server/server.go | 70 |
13 files changed, 944 insertions, 374 deletions
diff --git a/internal/config/config.go b/internal/config/config.go index 5b06efa..2717291 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,9 +2,11 @@ package config import ( "log/slog" + "maps" "net/url" "os" "searchix/internal/file" + "searchix/internal/importer" "github.com/pelletier/go-toml/v2" "github.com/pkg/errors" @@ -28,6 +30,7 @@ type Config struct { DataPath string `toml:"data_path"` CSP CSP `toml:"content-security-policy"` Headers map[string]string + Sources map[string]importer.Source } var defaultConfig = Config{ @@ -38,6 +41,22 @@ var defaultConfig = Config{ Headers: map[string]string{ "x-content-type-options": "nosniff", }, + Sources: map[string]importer.Source{ + "nixos": importer.Source{ + Name: "NixOS", + Enable: true, + Type: importer.Channel, + Channel: "nixos-unstable", + ImportPath: "nixos/release.nix", + Attribute: "options", + OutputPath: "share/doc/nixos/options.json", + Repo: importer.Repository{ + Type: "github", + Owner: "NixOS", + Repo: "nixpkgs", + }, + }, + }, } func GetConfig() (*Config, error) { @@ -60,6 +79,9 @@ func GetConfig() (*Config, error) { return nil, errors.Wrap(err, "config error") } } + maps.DeleteFunc(config.Sources, func(_ string, v importer.Source) bool { + return !v.Enable + }) return &config, nil } diff --git a/internal/importer/channel.go b/internal/importer/channel.go new file mode 100644 index 0000000..4d051cc --- /dev/null +++ b/internal/importer/channel.go @@ -0,0 +1,82 @@ +package importer + +import ( + "context" + "fmt" + "log/slog" + "os" + "os/exec" + "path" + "searchix/internal/file" + "searchix/internal/search" + "strconv" + "strings" + + "github.com/pkg/errors" +) + +type ChannelImporter struct { + DataPath string + Source Source + SourceFile string + Logger *slog.Logger + indexPath string +} + +func (i *ChannelImporter) FetchIfNeeded(parent context.Context) (bool, error) { + ctx, cancel := context.WithTimeout(parent, i.Source.FetchTimeout) + defer cancel() + + dest := i.DataPath + + before, err := os.Readlink(dest) + if file.NeedNotExist(err) != nil { + return false, errors.WithMessagef(err, "could not call readlink on file %s", dest) + } + i.Logger.Debug("stat before", "name", before) + + args := []string{ + "--no-build-output", + "--timeout", + strconv.Itoa(int(i.Source.FetchTimeout.Seconds() - 1)), + fmt.Sprintf("<%s/%s>", i.Source.Channel, i.Source.ImportPath), + "--attr", + i.Source.Attribute, + "--out-link", + dest, + } + + i.Logger.Debug("nix-build command", "args", args) + cmd := exec.CommandContext(ctx, "nix-build", args...) + out, err := cmd.Output() + if err != nil { + return false, errors.WithMessage(err, "failed to run nix-build (--dry-run)") + } + i.Logger.Debug("nix-build", "output", strings.TrimSpace(string(out))) + + outPath := path.Join(dest, i.Source.OutputPath) + i.Logger.Debug("checking output path", "outputPath", outPath, "dest", dest, "source", i.Source.OutputPath) + after, err := os.Readlink(dest) + if err := file.NeedNotExist(err); err != nil { + return false, errors.WithMessagef(err, "failed to stat output file from nix-build, filename: %s", outPath) + } + i.Logger.Debug("stat after", "name", after) + + return before != after, nil +} + +func (i *ChannelImporter) Import(parent context.Context, indexer *search.WriteIndex) (bool, error) { + if i.Source.OutputPath == "" { + return false, errors.New("no output path specified") + } + + filename := path.Join(i.DataPath, i.SourceFile, i.Source.OutputPath) + i.Logger.Debug("preparing import run", "revision", i.Source.Repo.Revision, "filename", filename) + + return processOptions(parent, indexer, &importConfig{ + IndexPath: i.indexPath, + Source: i.Source, + Filename: filename, + Logger: i.Logger, + }) +} diff --git a/internal/importer/http.go b/internal/importer/http.go new file mode 100644 index 0000000..1bf2428 --- /dev/null +++ b/internal/importer/http.go @@ -0,0 +1,63 @@ +package importer + +import ( + "context" + "fmt" + "log/slog" + "net/http" + "os" + "searchix/internal/file" + "strings" + "time" + + "github.com/pkg/errors" +) + +func fetchFileIfNeeded(ctx context.Context, path string, url string) (needed bool, err error) { + stat, err := file.StatIfExists(path) + if err != nil { + return false, errors.WithMessagef(err, "could not stat file %s", path) + } + + var mtime string + if stat != nil { + mtime = strings.Replace(stat.ModTime().UTC().Format(time.RFC1123), "UTC", "GMT", 1) + } + + req, err := http.NewRequestWithContext(ctx, "GET", url, http.NoBody) + if err != nil { + return false, errors.WithMessagef(err, "could not create HTTP request for %s", url) + } + + if mtime != "" { + req.Header.Set("If-Modified-Since", mtime) + } + res, err := http.DefaultClient.Do(req) + if err != nil { + return false, errors.WithMessagef(err, "could not make HTTP request to %s", url) + } + defer res.Body.Close() + + switch res.StatusCode { + case http.StatusNotModified: + needed = false + case http.StatusOK: + newMtime, err := time.Parse(time.RFC1123, res.Header.Get("Last-Modified")) + if err != nil { + slog.Warn("could not parse Last-Modified header from response", "value", res.Header.Get("Last-Modified")) + } + err = file.WriteToFile(path, res.Body) + if err != nil { + return false, errors.WithMessagef(err, "could not write response body to file %s", path) + } + err = os.Chtimes(path, time.Time{}, newMtime) + if err != nil { + slog.Warn("could not update mtime on file", "file", path) + } + needed = true + default: + return false, fmt.Errorf("got response code %d, don't know what to do", res.StatusCode) + } + + return needed, nil +} diff --git a/internal/importer/importer.go b/internal/importer/importer.go new file mode 100644 index 0000000..2318fe4 --- /dev/null +++ b/internal/importer/importer.go @@ -0,0 +1,112 @@ +package importer + +import ( + "context" + "log/slog" + "path" + "searchix/internal/search" + "sync" + "time" +) + +type Source struct { + Name string + Enable bool + Type Type + Channel string + Attribute string + ImportPath string `toml:"import-path"` + FetchTimeout time.Duration `toml:"fetch-timeout"` + ImportTimeout time.Duration `toml:"import-timeout"` + OutputPath string `toml:"output-path"` + Repo Repository +} + +type Importer interface { + FetchIfNeeded(context.Context) (bool, error) + Import(context.Context, *search.WriteIndex) (bool, error) +} + +func NewNixpkgsChannelImporter(source Source, dataPath string, logger *slog.Logger) *NixpkgsChannelImporter { + indexPath := dataPath + fullpath := path.Join(dataPath, source.Channel) + + return &NixpkgsChannelImporter{ + DataPath: fullpath, + Source: source, + Logger: logger, + indexPath: indexPath, + } +} + +func NewChannelImporter(source Source, dataPath string, logger *slog.Logger) *ChannelImporter { + indexPath := dataPath + fullpath := path.Join(dataPath, source.Channel) + + return &ChannelImporter{ + DataPath: fullpath, + Source: source, + Logger: logger, + indexPath: indexPath, + } +} + +type importConfig struct { + IndexPath string + Filename string + Source Source + Logger *slog.Logger +} + +func processOptions(parent context.Context, indexer *search.WriteIndex, conf *importConfig) (bool, error) { + ctx, cancel := context.WithTimeout(parent, conf.Source.ImportTimeout) + defer cancel() + + conf.Logger.Debug("creating option processor", "filename", conf.Filename) + processor, err := NewOptionProcessor(conf.Filename, conf.Source) + if err != nil { + return true, err + } + + wg := sync.WaitGroup{} + + wg.Add(1) + options, pErrs := processor.Process(ctx) + + wg.Add(1) + iErrs := indexer.ImportOptions(ctx, options) + + var hadErrors bool + go func() { + for { + select { + case err, running := <-iErrs: + if !running { + wg.Done() + iErrs = nil + slog.Info("ingest completed") + + continue + } + hadErrors = true + conf.Logger.Warn("error ingesting option", "error", err) + case err, running := <-pErrs: + if !running { + wg.Done() + pErrs = nil + slog.Info("processing completed") + + continue + } + hadErrors = true + conf.Logger.Warn("error processing option", "error", err) + } + } + }() + + slog.Debug("options processing", "state", "waiting") + wg.Wait() + slog.Debug("options processing", "state", "complete") + + return hadErrors, nil +} diff --git a/internal/importer/ingest.go b/internal/importer/ingest.go new file mode 100644 index 0000000..b9db80c --- /dev/null +++ b/internal/importer/ingest.go @@ -0,0 +1,237 @@ +package importer + +import ( + "context" + "fmt" + "log/slog" + "net/url" + "os" + "reflect" + "searchix/internal/options" + "strings" + + "github.com/bcicen/jstream" + "github.com/mitchellh/mapstructure" + "github.com/pkg/errors" +) + +type nixValueJSON struct { + Type string `mapstructure:"_type"` + Text string +} + +type linkJSON struct { + Name string + URL string `json:"url"` +} + +type nixOptionJSON struct { + Declarations []linkJSON + Default *nixValueJSON + Description string + Example *nixValueJSON + Loc []string + ReadOnly bool + RelatedPackages string + Type string +} + +func ValueTypeToString(valueType jstream.ValueType) string { + switch valueType { + case jstream.Unknown: + return "unknown" + case jstream.Null: + return "null" + case jstream.String: + return "string" + case jstream.Number: + return "number" + case jstream.Boolean: + return "boolean" + case jstream.Array: + return "array" + case jstream.Object: + return "object" + } + + return "very strange" +} + +func makeGitHubFileURL(userRepo string, ref string, subPath string) string { + url, _ := url.JoinPath("https://github.com/", userRepo, "blob", ref, subPath) + + return url +} + +// make configurable? +var channelRepoMap = map[string]string{ + "nixpkgs": "NixOS/nixpkgs", + "nix-darwin": "LnL7/nix-darwin", + "home-manager": "nix-community/home-manager", +} + +func MakeChannelLink(channel string, ref string, subPath string) (*options.Link, error) { + if channelRepoMap[channel] == "" { + return nil, fmt.Errorf("don't know what repository relates to channel <%s>", channel) + } + + return &options.Link{ + Name: fmt.Sprintf("<%s/%s>", channel, subPath), + URL: makeGitHubFileURL(channelRepoMap[channel], ref, subPath), + }, nil +} + +func convertNixValue(nj *nixValueJSON) *options.NixValue { + if nj == nil { + return nil + } + switch nj.Type { + case "", "literalExpression": + return &options.NixValue{ + Text: nj.Text, + } + case "literalMD": + return &options.NixValue{ + Markdown: options.Markdown(nj.Text), + } + default: + slog.Warn("got unexpected NixValue type", "type", nj.Type, "text", nj.Text) + + return nil + } +} + +type OptionIngester struct { + dec *jstream.Decoder + ms *mapstructure.Decoder + optJSON nixOptionJSON + infile *os.File + source Source +} + +type Ingester[T options.NixOption] interface { + Process() (<-chan *T, <-chan error) +} + +func NewOptionProcessor(inpath string, source Source) (*OptionIngester, error) { + infile, err := os.Open(inpath) + if err != nil { + return nil, errors.WithMessagef(err, "failed to open input file %s", inpath) + } + i := OptionIngester{ + dec: jstream.NewDecoder(infile, 1).EmitKV(), + optJSON: nixOptionJSON{}, + infile: infile, + source: source, + } + + ms, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + ErrorUnused: true, + ZeroFields: true, + Result: &i.optJSON, + Squash: true, + DecodeHook: mapstructure.TextUnmarshallerHookFunc(), + }) + if err != nil { + defer infile.Close() + + return nil, errors.WithMessage(err, "could not create mapstructure decoder") + } + i.ms = ms + + return &i, nil +} + +func (i *OptionIngester) Process(ctx context.Context) (<-chan *options.NixOption, <-chan error) { + results := make(chan *options.NixOption) + errs := make(chan error) + + go func() { + defer i.infile.Close() + defer close(results) + defer close(errs) + + slog.Debug("starting decoder stream") + outer: + for mv := range i.dec.Stream() { + select { + case <-ctx.Done(): + break outer + default: + } + if err := i.dec.Err(); err != nil { + errs <- errors.WithMessage(err, "could not decode JSON") + + continue + } + if mv.ValueType != jstream.Object { + errs <- errors.Errorf("unexpected object type %s", ValueTypeToString(mv.ValueType)) + + continue + } + kv := mv.Value.(jstream.KV) + x := kv.Value.(map[string]interface{}) + + var decls []*options.Link + for _, decl := range x["declarations"].([]interface{}) { + i.optJSON = nixOptionJSON{} + + switch decl := reflect.ValueOf(decl); decl.Kind() { + case reflect.String: + s := decl.String() + link, err := MakeChannelLink(i.source.Channel, i.source.Repo.Revision, s) + if err != nil { + errs <- errors.WithMessagef(err, + "could not make a channel link for channel %s, revision %s and subpath %s", + i.source.Channel, i.source.Repo.Revision, s, + ) + + continue + } + decls = append(decls, link) + case reflect.Map: + v := decl.Interface().(map[string]interface{}) + link := options.Link{ + Name: v["name"].(string), + URL: v["url"].(string), + } + decls = append(decls, &link) + default: + errs <- errors.Errorf("unexpected declaration type %s", decl.Kind().String()) + + continue + } + } + if len(decls) > 0 { + x["declarations"] = decls + } + + err := i.ms.Decode(x) // stores in optJSON + if err != nil { + errs <- errors.WithMessagef(err, "failed to decode option %#v", x) + + continue + } + + var decs = make([]options.Link, len(i.optJSON.Declarations)) + for i, d := range i.optJSON.Declarations { + decs[i] = options.Link(d) + } + + // slog.Debug("sending option", "name", kv.Key) + results <- &options.NixOption{ + Option: kv.Key, + Source: strings.ToLower(i.source.Name), + Declarations: decs, + Default: convertNixValue(i.optJSON.Default), + Description: options.Markdown(i.optJSON.Description), + Example: convertNixValue(i.optJSON.Example), + RelatedPackages: options.Markdown(i.optJSON.RelatedPackages), + Loc: i.optJSON.Loc, + Type: i.optJSON.Type, + } + } + }() + + return results, errs +} diff --git a/internal/importer/nixpkgs-channel.go b/internal/importer/nixpkgs-channel.go new file mode 100644 index 0000000..0e5be62 --- /dev/null +++ b/internal/importer/nixpkgs-channel.go @@ -0,0 +1,82 @@ +package importer + +import ( + "bytes" + "context" + "log/slog" + "net/url" + "os" + "path" + "searchix/internal/file" + "searchix/internal/search" + + "github.com/pkg/errors" +) + +type NixpkgsChannelImporter struct { + DataPath string + Source Source + Logger *slog.Logger + indexPath string +} + +func makeChannelURL(channel string, subPath string) (string, error) { + url, err := url.JoinPath("https://channels.nixos.org/", channel, subPath) + + return url, errors.WithMessagef(err, "error creating URL") +} + +var filesToFetch = map[string]string{ + "revision": "git-revision", + "options": "options.json.br", +} + +func (i *NixpkgsChannelImporter) FetchIfNeeded(parent context.Context) (bool, error) { + ctx, cancel := context.WithTimeout(parent, i.Source.FetchTimeout) + defer cancel() + + root := i.DataPath + + err := file.Mkdirp(root) + if err != nil { + return false, errors.WithMessagef(err, "error creating directory for data: %s", root) + } + + for _, filename := range filesToFetch { + url, err := makeChannelURL(i.Source.Channel, filename) + if err != nil { + return false, err + } + + path := path.Join(root, filename) + + updated, err := fetchFileIfNeeded(ctx, path, url) + if err != nil { + return false, err + } + // don't bother to issue requests for the later files + if !updated { + return false, err + } + } + + return true, nil +} + +func (i *NixpkgsChannelImporter) Import(parent context.Context, indexer *search.WriteIndex) (bool, error) { + filename := path.Join(i.DataPath, filesToFetch["options"]) + revFilename := path.Join(i.DataPath, filesToFetch["revision"]) + bits, err := os.ReadFile(revFilename) + if err != nil { + return false, errors.WithMessagef(err, "unable to read revision file at %s", revFilename) + } + i.Source.Repo.Revision = string(bytes.TrimSpace(bits)) + i.Logger.Debug("preparing import run", "revision", i.Source.Repo.Revision, "filename", filename) + + return processOptions(parent, indexer, &importConfig{ + IndexPath: i.indexPath, + Source: i.Source, + Filename: filename, + Logger: i.Logger, + }) +} diff --git a/internal/importer/repository.go b/internal/importer/repository.go new file mode 100644 index 0000000..6cfd55e --- /dev/null +++ b/internal/importer/repository.go @@ -0,0 +1,44 @@ +package importer + +import ( + "fmt" + "strings" +) + +type RepoType int + +const ( + GitHub = iota + 1 +) + +type Repository struct { + Type string `default:"github"` + Owner string + Repo string + Revision string +} + +func (f RepoType) String() string { + switch f { + case GitHub: + return "github" + default: + return fmt.Sprintf("RepoType(%d)", f) + } +} + +func parseRepoType(name string) (RepoType, error) { + switch strings.ToLower(name) { + case "github": + return GitHub, nil + default: + return Unknown, fmt.Errorf("unsupported repo type %s", name) + } +} + +func (f *RepoType) UnmarshalText(text []byte) error { + var err error + *f, err = parseRepoType(string(text)) + + return err +} diff --git a/internal/importer/source-type.go b/internal/importer/source-type.go new file mode 100644 index 0000000..5d84547 --- /dev/null +++ b/internal/importer/source-type.go @@ -0,0 +1,44 @@ +package importer + +import ( + "fmt" + + "github.com/stoewer/go-strcase" +) + +type Type int + +const ( + Unknown = iota + Channel + ChannelNixpkgs +) + +func (f Type) String() string { + switch f { + case Channel: + return "channel" + case ChannelNixpkgs: + return "channel-nixpkgs" + } + + return fmt.Sprintf("Fetcher(%d)", f) +} + +func parseType(name string) (Type, error) { + switch strcase.KebabCase(name) { + case "channel": + return Channel, nil + case "channel-nixpkgs": + return ChannelNixpkgs, nil + default: + return Unknown, fmt.Errorf("unsupported fetcher %s", name) + } +} + +func (f *Type) UnmarshalText(text []byte) error { + var err error + *f, err = parseType(string(text)) + + return err +} diff --git a/internal/options/option.go b/internal/options/option.go index a43dd49..b7fe818 100644 --- a/internal/options/option.go +++ b/internal/options/option.go @@ -14,6 +14,7 @@ type Link struct { type NixOption struct { Option string + Source string Declarations []Link Default *NixValue `json:",omitempty"` @@ -24,4 +25,6 @@ type NixOption struct { Type string } -type NixOptions []NixOption +func (*NixOption) BleveType() string { + return "option" +} diff --git a/internal/options/process.go b/internal/options/process.go deleted file mode 100644 index 4e7c664..0000000 --- a/internal/options/process.go +++ /dev/null @@ -1,224 +0,0 @@ -package options - -import ( - "encoding/json" - "fmt" - "io" - "log/slog" - "net/url" - "os" - "reflect" - - "github.com/bcicen/jstream" - "github.com/mitchellh/mapstructure" - "github.com/pkg/errors" -) - -type nixValueJSON struct { - Type string `mapstructure:"_type"` - Text string -} - -type linkJSON struct { - Name string - URL string `json:"url"` -} - -type nixOptionJSON struct { - Declarations []linkJSON - Default *nixValueJSON - Description string - Example *nixValueJSON - Loc []string - ReadOnly bool - RelatedPackages string - Type string -} - -func ValueTypeToString(valueType jstream.ValueType) string { - switch valueType { - case jstream.Unknown: - return "unknown" - case jstream.Null: - return "null" - case jstream.String: - return "string" - case jstream.Number: - return "number" - case jstream.Boolean: - return "boolean" - case jstream.Array: - return "array" - case jstream.Object: - return "object" - } - - return "very strange" -} - -func makeGitHubFileURL(userRepo string, ref string, subPath string) string { - url, _ := url.JoinPath("https://github.com/", userRepo, "blob", ref, subPath) - - return url -} - -// make configurable? -var channelRepoMap = map[string]string{ - "nixpkgs": "NixOS/nixpkgs", - "nix-darwin": "LnL7/nix-darwin", - "home-manager": "nix-community/home-manager", -} - -func MakeChannelLink(channel string, ref string, subPath string) (*Link, error) { - if channelRepoMap[channel] == "" { - return nil, fmt.Errorf("don't know what repository relates to channel <%s>", channel) - } - - return &Link{ - Name: fmt.Sprintf("<%s/%s>", channel, subPath), - URL: makeGitHubFileURL(channelRepoMap[channel], ref, subPath), - }, nil -} - -func convertNixValue(nj *nixValueJSON) *NixValue { - if nj == nil { - return nil - } - switch nj.Type { - case "", "literalExpression": - return &NixValue{ - Text: nj.Text, - } - case "literalMD": - return &NixValue{ - Markdown: Markdown(nj.Text), - } - default: - slog.Warn("got unexpected NixValue type", "type", nj.Type, "text", nj.Text) - - return nil - } -} - -func Process(inpath string, outpath string, channel string, revision string) error { - infile, err := os.Open(inpath) - if err != nil { - return errors.WithMessagef(err, "failed to open input file %s", inpath) - } - defer infile.Close() - outfile, err := os.Create(outpath) - if err != nil { - return errors.WithMessagef(err, "failed to open output file %s", outpath) - } - if outpath != "/dev/stdout" { - defer outfile.Close() - } - - dec := jstream.NewDecoder(infile, 1).EmitKV() - var optJSON nixOptionJSON - ms, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ - ErrorUnused: true, - ZeroFields: true, - Result: &optJSON, - Squash: true, - DecodeHook: mapstructure.TextUnmarshallerHookFunc(), - }) - if err != nil { - return errors.WithMessage(err, "could not create mapstructure decoder") - } - - _, err = outfile.WriteString("[\n") - if err != nil { - return errors.WithMessage(err, "could not write to output") - } - for mv := range dec.Stream() { - if err := dec.Err(); err != nil { - return errors.WithMessage(err, "could not decode JSON") - } - if mv.ValueType != jstream.Object { - return errors.Errorf("unexpected object type %s", ValueTypeToString(mv.ValueType)) - } - kv := mv.Value.(jstream.KV) - x := kv.Value.(map[string]interface{}) - - var decls []*Link - for _, decl := range x["declarations"].([]interface{}) { - optJSON = nixOptionJSON{} - - switch decl := reflect.ValueOf(decl); decl.Kind() { - case reflect.String: - s := decl.String() - link, err := MakeChannelLink(channel, revision, s) - if err != nil { - return errors.WithMessagef(err, - "could not make a channel link for channel %s, revision %s and subpath %s", - channel, revision, s, - ) - } - decls = append(decls, link) - case reflect.Map: - v := decl.Interface().(map[string]interface{}) - link := Link{ - Name: v["name"].(string), - URL: v["url"].(string), - } - decls = append(decls, &link) - default: - println("kind", decl.Kind().String()) - panic("unexpected object type") - } - } - if len(decls) > 0 { - x["declarations"] = decls - } - - err = ms.Decode(x) // stores in optJSON - if err != nil { - return errors.WithMessagef(err, "failed to decode option %#v", x) - } - - var decs = make([]Link, len(optJSON.Declarations)) - for i, d := range optJSON.Declarations { - decs[i] = Link(d) - } - - opt := NixOption{ - Option: kv.Key, - Declarations: decs, - Default: convertNixValue(optJSON.Default), - Description: Markdown(optJSON.Description), - Example: convertNixValue(optJSON.Example), - RelatedPackages: Markdown(optJSON.RelatedPackages), - Loc: optJSON.Loc, - Type: optJSON.Type, - } - - b, err := json.MarshalIndent(opt, "", " ") - if err != nil { - return errors.WithMessagef(err, "failed to encode option %#v", opt) - } - - _, err = outfile.Write(b) - if err != nil { - return errors.WithMessage(err, "failed to write to output") - } - _, err = outfile.WriteString(",\n") - if err != nil { - return errors.WithMessage(err, "failed to write to output") - } - } - - if outpath != "/dev/stdout" { - _, err = outfile.Seek(-2, io.SeekCurrent) - if err != nil { - return errors.WithMessage(err, "could not write to output") - } - } - - _, err = outfile.WriteString("\n]\n") - if err != nil { - return errors.WithMessage(err, "could not write to output") - } - - return nil -} diff --git a/internal/search/indexer.go b/internal/search/indexer.go new file mode 100644 index 0000000..b0e57d4 --- /dev/null +++ b/internal/search/indexer.go @@ -0,0 +1,183 @@ +package search + +import ( + "bytes" + "context" + "encoding/gob" + "log" + "log/slog" + "path" + "searchix/internal/options" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" + "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword" + "github.com/blevesearch/bleve/v2/analysis/analyzer/web" + "github.com/blevesearch/bleve/v2/analysis/token/camelcase" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" + "github.com/blevesearch/bleve/v2/analysis/tokenizer/single" + "github.com/blevesearch/bleve/v2/document" + "github.com/blevesearch/bleve/v2/mapping" + index "github.com/blevesearch/bleve_index_api" + "github.com/pkg/errors" +) + +type WriteIndex struct { + index bleve.Index + indexMapping *mapping.IndexMappingImpl +} + +func NewIndexer(dir string) (*WriteIndex, error) { + var err error + bleve.SetLog(log.Default()) + + indexPath := path.Join(dir, indexFilename) + + indexMapping := bleve.NewIndexMapping() + indexMapping.StoreDynamic = false + indexMapping.IndexDynamic = false + indexMapping.TypeField = "BleveType" + + textFieldMapping := bleve.NewTextFieldMapping() + textFieldMapping.Store = false + + descriptionFieldMapping := bleve.NewTextFieldMapping() + descriptionFieldMapping.Store = false + descriptionFieldMapping.Analyzer = web.Name + + err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ + "type": custom.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("loc", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": letter.Name, + "token_filters": []string{ + camelcase.Name, + }, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + err = indexMapping.AddCustomAnalyzer("keyword_single", map[string]interface{}{ + "type": keyword.Name, + "tokenizer": single.Name, + }) + if err != nil { + return nil, errors.WithMessage(err, "could not add custom analyser") + } + + keywordFieldMapping := bleve.NewKeywordFieldMapping() + keywordFieldMapping.Analyzer = "keyword_single" + + nameMapping := bleve.NewTextFieldMapping() + nameMapping.Analyzer = "option_name" + nameMapping.IncludeTermVectors = true + nameMapping.Store = false + + nixValueMapping := bleve.NewDocumentStaticMapping() + nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) + nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) + + locFieldMapping := bleve.NewKeywordFieldMapping() + locFieldMapping.Analyzer = "loc" + locFieldMapping.IncludeTermVectors = true + locFieldMapping.Store = false + + optionMapping := bleve.NewDocumentStaticMapping() + + optionMapping.AddFieldMappingsAt("Option", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Source", keywordFieldMapping) + optionMapping.AddFieldMappingsAt("Loc", locFieldMapping) + optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) + optionMapping.AddFieldMappingsAt("Description", textFieldMapping) + + optionMapping.AddSubDocumentMapping("Default", nixValueMapping) + optionMapping.AddSubDocumentMapping("Example", nixValueMapping) + + indexMapping.AddDocumentMapping("option", optionMapping) + + idx, err := bleve.New(indexPath, indexMapping) + if err != nil { + return nil, errors.WithMessagef(err, "unable to create index at path %s", indexPath) + } + + return &WriteIndex{ + idx, + indexMapping, + }, nil +} + +func (i *WriteIndex) ImportOptions(ctx context.Context, objects <-chan *options.NixOption) <-chan error { + var err error + errs := make(chan error) + + go func() { + defer close(errs) + batch := i.index.NewBatch() + + outer: + for opt := range objects { + select { + case <-ctx.Done(): + slog.Debug("context cancelled") + + break outer + default: + } + + doc := document.NewDocument(opt.Source + "/" + opt.Option) + err = i.indexMapping.MapDocument(doc, opt) + if err != nil { + errs <- errors.WithMessagef(err, "could not map document for option: %s", opt.Option) + + continue + } + + var data bytes.Buffer + enc := gob.NewEncoder(&data) + err = enc.Encode(opt) + if err != nil { + errs <- errors.WithMessage(err, "could not store option in search index") + + continue + } + field := document.NewTextFieldWithIndexingOptions("_data", nil, data.Bytes(), index.StoreField) + newDoc := doc.AddField(field) + + // slog.Debug("adding option to index", "name", opt.Option) + err = batch.IndexAdvanced(newDoc) + + if err != nil { + errs <- errors.WithMessagef(err, "could not index option %s", opt.Option) + + continue + } + } + + size := batch.Size() + slog.Debug("flushing batch", "size", size) + + err := i.index.Batch(batch) + if err != nil { + errs <- errors.WithMessagef(err, "could not flush batch") + } + }() + + return errs +} + +func (i *WriteIndex) Close() error { + err := i.index.Close() + if err != nil { + return errors.WithMessagef(err, "could not close index") + } + + return nil +} diff --git a/internal/search/search.go b/internal/search/search.go index 97d8404..92afdfb 100644 --- a/internal/search/search.go +++ b/internal/search/search.go @@ -4,151 +4,73 @@ import ( "bytes" "context" "encoding/gob" - "log" - "os" "path" "searchix/internal/options" - "github.com/bcicen/jstream" "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" - "github.com/blevesearch/bleve/v2/analysis/token/camelcase" - "github.com/blevesearch/bleve/v2/analysis/tokenizer/letter" - "github.com/blevesearch/bleve/v2/document" "github.com/blevesearch/bleve/v2/search" - index "github.com/blevesearch/bleve_index_api" - "github.com/mitchellh/mapstructure" "github.com/pkg/errors" ) const ResultsPerPage = 20 +const indexFilename = "index.bleve" -type DocumentMatch[T options.NixOption] struct { +type DocumentMatch struct { search.DocumentMatch - Data T + Data options.NixOption } -type Result[T options.NixOption] struct { +type Result struct { *bleve.SearchResult - Hits []DocumentMatch[T] + Hits []DocumentMatch } -type Index[T options.NixOption] struct { +type ReadIndex struct { index bleve.Index } -func New[T options.NixOption](kind string) (*Index[T], error) { - var err error - bleve.SetLog(log.Default()) +func Open(dir string) (*ReadIndex, error) { + indexPath := path.Join(dir, indexFilename) - indexMapping := bleve.NewIndexMapping() - - textFieldMapping := bleve.NewTextFieldMapping() - textFieldMapping.Store = false - - descriptionFieldMapping := bleve.NewTextFieldMapping() - descriptionFieldMapping.Store = false - descriptionFieldMapping.Analyzer = "web" - - err = indexMapping.AddCustomAnalyzer("option_name", map[string]interface{}{ - "type": custom.Name, - "tokenizer": letter.Name, - "token_filters": []string{ - camelcase.Name, - }, - }) + idx, err := bleve.Open(indexPath) if err != nil { - return nil, errors.WithMessage(err, "could not add custom analyser") + return nil, errors.WithMessagef(err, "unable to open index at path %s", indexPath) } - nameMapping := bleve.NewTextFieldMapping() - nameMapping.Analyzer = "option_name" - nameMapping.IncludeTermVectors = true - nameMapping.Store = false - - nixValueMapping := bleve.NewDocumentStaticMapping() - nixValueMapping.AddFieldMappingsAt("Text", textFieldMapping) - nixValueMapping.AddFieldMappingsAt("Markdown", textFieldMapping) - - optionMapping := bleve.NewDocumentStaticMapping() - - optionMapping.AddFieldMappingsAt("Option", nameMapping) - optionMapping.AddFieldMappingsAt("Loc", bleve.NewKeywordFieldMapping()) - optionMapping.AddFieldMappingsAt("RelatedPackages", textFieldMapping) - optionMapping.AddFieldMappingsAt("Description", textFieldMapping) - - optionMapping.AddSubDocumentMapping("Default", nixValueMapping) - optionMapping.AddSubDocumentMapping("Example", nixValueMapping) - - indexMapping.AddDocumentMapping("option", optionMapping) - - idx, err := bleve.NewMemOnly(indexMapping) - // index, err = bleve.New(path.Join(cfg.DataPath, const indexFilename = "index.bleve"), indexMapping) - - if err != nil { - return nil, errors.WithMessage(err, "error opening index") - } - batch := idx.NewBatch() - - jsonFile, err := os.Open(path.Join("data", "processed", kind+".json")) - if err != nil { - return nil, errors.WithMessage(err, "error opening json file") - } - - dec := jstream.NewDecoder(jsonFile, 1) - var opt options.NixOption - ms, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ - ErrorUnused: true, - ZeroFields: true, - Result: &opt, - }) - if err != nil { - return nil, errors.WithMessage(err, "could not create struct decoder") - } - for mv := range dec.Stream() { - opt = options.NixOption{} - orig := mv.Value.(map[string]interface{}) - err := ms.Decode(orig) // stores in opt - if err != nil { - return nil, errors.WithMessagef(err, "could not decode value: %+v", orig) - } - doc := document.NewDocument(opt.Option) - err = indexMapping.MapDocument(doc, opt) - if err != nil { - return nil, errors.WithMessagef(err, "could not map document for option: %s", opt.Option) - } + return &ReadIndex{ + idx, + }, nil +} - var data bytes.Buffer - enc := gob.NewEncoder(&data) - err = enc.Encode(opt) - if err != nil { - return nil, errors.WithMessage(err, "could not store option in search index") - } - field := document.NewTextFieldWithIndexingOptions("data", nil, data.Bytes(), index.StoreField) - newDoc := doc.AddField(field) +func (index *ReadIndex) GetSource(ctx context.Context, name string) (*bleve.SearchResult, error) { + query := bleve.NewTermQuery(name) + query.SetField("Source") + search := bleve.NewSearchRequest(query) - err = batch.IndexAdvanced(newDoc) + result, err := index.index.SearchInContext(ctx, search) + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: if err != nil { - return nil, errors.WithMessagef(err, "could not index option %s", opt.Option) + return nil, errors.WithMessagef(err, "failed to execute search to find source %s in index", name) } } - err = idx.Batch(batch) - if err != nil { - return nil, errors.WithMessage(err, "failed to run batch index operation") - } - return &Index[T]{ - idx, - }, nil + return result, nil } -func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) (*Result[T], error) { - query := bleve.NewMatchQuery(keyword) - query.Analyzer = "option_name" +func (index *ReadIndex) Search(ctx context.Context, source string, keyword string, from uint64) (*Result, error) { + sourceQuery := bleve.NewTermQuery(source) + userQuery := bleve.NewMatchQuery(keyword) + userQuery.Analyzer = "option_name" + + query := bleve.NewConjunctionQuery(sourceQuery, userQuery) + search := bleve.NewSearchRequest(query) search.Size = ResultsPerPage - search.Fields = []string{"data"} + search.Fields = []string{"_data"} search.Explain = true if from != 0 { @@ -164,10 +86,10 @@ func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) return nil, errors.WithMessage(err, "failed to execute search query") } - results := make([]DocumentMatch[T], min(ResultsPerPage, bleveResult.Total)) + results := make([]DocumentMatch, min(ResultsPerPage, bleveResult.Total)) var buf bytes.Buffer for i, result := range bleveResult.Hits { - _, err = buf.WriteString(result.Fields["data"].(string)) + _, err = buf.WriteString(result.Fields["_data"].(string)) if err != nil { return nil, errors.WithMessage(err, "error fetching result data") } @@ -178,7 +100,7 @@ func (index *Index[T]) Search(ctx context.Context, keyword string, from uint64) buf.Reset() } - return &Result[T]{ + return &Result{ SearchResult: bleveResult, Hits: results, }, nil diff --git a/internal/server/server.go b/internal/server/server.go index b794f05..5def347 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -20,6 +20,7 @@ import ( "searchix/internal/options" "searchix/internal/search" + "github.com/blevesearch/bleve/v2" "github.com/getsentry/sentry-go" sentryhttp "github.com/getsentry/sentry-go/http" "github.com/osdevisnot/sorvor/pkg/livereload" @@ -57,17 +58,18 @@ type Server struct { const jsSnippet = template.HTML(livereload.JsSnippet) // #nosec G203 type TemplateData struct { - LiveReload template.HTML - Source string - Query string - Results bool + LiveReload template.HTML + Source string + Query string + Results bool + SourceResult *bleve.SearchResult } type ResultData[T options.NixOption] struct { TemplateData Query string ResultsPerPage int - Results *search.Result[T] + Results *search.Result Prev string Next string } @@ -77,24 +79,6 @@ func applyDevModeOverrides(config *cfg.Config) { config.CSP.ConnectSrc = slices.Insert(config.CSP.ConnectSrc, 0, "'self'") } -var index = map[string]*search.Index[options.NixOption]{} - -var sourceFileName = map[string]string{ - "darwin": "darwin-options", - "home-manager": "home-manager-options", - "nixos": "nixos-options-nixos-unstable", -} - -func makeIndex(source string, filename string) { - var err error - slog.Info("loading index", "index", source) - index[source], err = search.New(filename) - slog.Info("loaded index", "index", source) - if err != nil { - log.Fatalf("could not build search index, error: %#v", err) - } -} - func New(runtimeConfig *Config) (*Server, error) { var err error config, err = cfg.GetConfig() @@ -102,6 +86,13 @@ func New(runtimeConfig *Config) (*Server, error) { return nil, errors.WithMessage(err, "error parsing configuration file") } + slog.Debug("loading index") + index, err := search.Open(config.DataPath) + slog.Debug("loaded index") + if err != nil { + log.Fatalf("could not open search index, error: %#v", err) + } + env := "development" if runtimeConfig.Production { env = "production" @@ -138,19 +129,34 @@ func New(runtimeConfig *Config) (*Server, error) { } }) + const getSourceTimeout = 1 * time.Second mux.HandleFunc("/options/{source}/search", func(w http.ResponseWriter, r *http.Request) { source := r.PathValue("source") - if index[source] == nil { + ctx, cancel := context.WithTimeout(context.Background(), getSourceTimeout) + defer cancel() + + sourceResult, err := index.GetSource(ctx, source) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + + return + } + + if sourceResult.Total == 0 { http.Error(w, "Unknown source", http.StatusNotFound) return } - err := templates["search"].Execute(w, TemplateData{ - LiveReload: jsSnippet, - Source: source, + + err = templates["search"].Execute(w, TemplateData{ + LiveReload: jsSnippet, + Source: source, + SourceResult: sourceResult, }) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) + + return } }) @@ -159,7 +165,7 @@ func New(runtimeConfig *Config) (*Server, error) { source := r.PathValue("source") ctx, cancel := context.WithTimeoutCause(r.Context(), timeout, errors.New("timeout")) defer cancel() - if index[source] == nil { + if index == nil { http.Error(w, "Unknown source", http.StatusNotFound) return @@ -173,7 +179,7 @@ func New(runtimeConfig *Config) (*Server, error) { http.Error(w, "Bad query string", http.StatusBadRequest) } } - results, err := index[source].Search(ctx, qs, (page-1)*search.ResultsPerPage) + results, err := index.Search(ctx, source, qs, (page-1)*search.ResultsPerPage) if err != nil { if err == context.DeadlineExceeded { http.Error(w, "Search timed out", http.StatusInternalServerError) @@ -238,12 +244,6 @@ func New(runtimeConfig *Config) (*Server, error) { mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("frontend/static")))) - go func() { - for source, filename := range sourceFileName { - makeIndex(source, filename) - } - }() - if runtimeConfig.LiveReload { applyDevModeOverrides(config) liveReload := livereload.New() |