diff options
Diffstat (limited to 'internal/importer')
-rw-r--r-- | internal/importer/package.go | 246 |
1 files changed, 246 insertions, 0 deletions
diff --git a/internal/importer/package.go b/internal/importer/package.go new file mode 100644 index 0000000..c0f5f93 --- /dev/null +++ b/internal/importer/package.go @@ -0,0 +1,246 @@ +package importer + +import ( + "context" + "encoding/json" + "log/slog" + "os" + "reflect" + "searchix/internal/config" + "searchix/internal/packages" + "strings" + + "github.com/bcicen/jstream" + "github.com/mitchellh/mapstructure" + "github.com/pkg/errors" +) + +type packageJSON struct { + Name string `mapstructure:"pname"` + Meta metaJSON + Version string +} + +type metaJSON struct { + Broken bool + Description string + LongDescription string + Homepages []string `mapstructure:"homepage"` + MainProgram string + Maintainers []maintainerJSON + Platforms []string + Position string +} + +type maintainerJSON struct { + Github string + Name string +} + +type PackageIngester struct { + dec *jstream.Decoder + ms *mapstructure.Decoder + pkg *packageJSON + infile *os.File + source *config.Source +} + +func makeAdhocLicense(name string) packages.License { + return packages.License{ + FullName: name, + } +} + +func makeAdhocPlatform(v any) string { + s, err := json.Marshal(v) + if err != nil { + panic("can't convert json back to json?") + } + + return string(s) +} + +func NewPackageProcessor(inpath string, source *config.Source) (*PackageIngester, error) { + infile, err := os.Open(inpath) + if err != nil { + return nil, errors.WithMessagef(err, "failed to open input file %s", inpath) + } + i := &PackageIngester{ + dec: jstream.NewDecoder(infile, 2).EmitKV(), + pkg: &packageJSON{}, + infile: infile, + source: source, + } + + ms, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + ZeroFields: true, + Result: i.pkg, + Squash: true, + DecodeHook: mapstructure.TextUnmarshallerHookFunc(), + }) + if err != nil { + defer infile.Close() + + return nil, errors.WithMessage(err, "could not create mapstructure decoder") + } + i.ms = ms + + return i, nil +} + +func convertToLicense(in map[string]any) *packages.License { + l := &packages.License{} + if v, found := in["shortName"]; found { + l.Name = v.(string) + } + if v, found := in["fullName"]; found { + l.FullName = v.(string) + } + if v, found := in["appendixUrl"]; found { + l.AppendixURL = v.(string) + } + if v, found := in["spdxId"]; found { + l.SPDXId = v.(string) + } + if v, found := in["url"]; found { + l.URL = v.(string) + } + + return l +} + +func (i *PackageIngester) Process(ctx context.Context) (<-chan *packages.Package, <-chan error) { + results := make(chan *packages.Package) + errs := make(chan error) + + go func() { + defer i.infile.Close() + defer close(results) + defer close(errs) + + userRepo := i.source.Repo.Owner + "/" + i.source.Repo.Repo + slog.Debug("starting decoder stream") + outer: + for mv := range i.dec.Stream() { + var err error + select { + case <-ctx.Done(): + break outer + default: + } + if err := i.dec.Err(); err != nil { + errs <- errors.WithMessage(err, "could not decode JSON") + + continue + } + if mv.ValueType != jstream.Object { + errs <- errors.Errorf("unexpected object type %s", ValueTypeToString(mv.ValueType)) + + continue + } + kv := mv.Value.(jstream.KV) + x := kv.Value.(map[string]interface{}) + + meta := x["meta"].(map[string]interface{}) + + var licenses []packages.License + if meta["license"] != nil { + switch v := reflect.ValueOf(meta["license"]); v.Kind() { + case reflect.Map: + licenses = append(licenses, *convertToLicense(v.Interface().(map[string]interface{}))) + case reflect.Array, reflect.Slice: + licenses = make([]packages.License, v.Len()) + for i, v := range v.Interface().([]interface{}) { + switch v := reflect.ValueOf(v); v.Kind() { + case reflect.String: + licenses[i] = makeAdhocLicense(v.String()) + case reflect.Map: + licenses[i] = *convertToLicense(v.Interface().(map[string]interface{})) + default: + errs <- errors.Errorf( + "don't know how to handle sublicense of type %s: %v", + v.Kind().String(), + v, + ) + } + } + case reflect.String: + licenses = append(licenses, makeAdhocLicense(v.String())) + default: + errs <- errors.Errorf( + "don't know how to handle license of type %s: %v", + v.Kind().String(), + meta["license"], + ) + } + delete(meta, "license") + } + + if meta["platforms"] != nil { + var plats = make([]any, len(meta["platforms"].([]any))) + for i, plat := range meta["platforms"].([]interface{}) { + switch v := reflect.ValueOf(plat); v.Kind() { + case reflect.String: + plats[i] = v.String() + case reflect.Map: + plats[i] = makeAdhocPlatform(v.Interface()) + default: + errs <- errors.Errorf( + "don't know how to convert platform type %s", + v.Kind().String(), + ) + } + } + meta["platforms"] = plats + } + if meta["homepage"] != nil { + switch v := reflect.ValueOf(meta["homepage"]); v.Kind() { + case reflect.String: + meta["homepage"] = []string{v.String()} + case reflect.Slice: + // already fine + default: + errs <- errors.Errorf( + "don't know how to interpret homepage type %s'", + v.Kind().String(), + ) + } + } + + err = i.ms.Decode(x) // stores in i.pkg + if err != nil { + errs <- errors.WithMessagef(err, "failed to decode package %#v", x) + + continue + } + + maintainers := make([]packages.Maintainer, len(i.pkg.Meta.Maintainers)) + for i, m := range i.pkg.Meta.Maintainers { + maintainers[i] = packages.Maintainer{ + Name: m.Name, + Github: m.Github, + } + } + + subpath, line, _ := strings.Cut(i.pkg.Meta.Position, ":") + + results <- &packages.Package{ + Name: i.pkg.Name, + Version: i.pkg.Version, + Meta: packages.Meta{ + Broken: i.pkg.Meta.Broken, + Description: i.pkg.Meta.Description, + LongDescription: i.pkg.Meta.LongDescription, + Homepages: i.pkg.Meta.Homepages, + Licenses: licenses, + MainProgram: i.pkg.Meta.MainProgram, + Platforms: i.pkg.Meta.Platforms, + Maintainers: maintainers, + Position: makeGitHubFileURL(userRepo, "", subpath, line), + }, + } + } + }() + + return results, errs +} |