Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(linter): expand package checks across multiple ecosystems #322

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
262 changes: 30 additions & 232 deletions tools/osv-linter/internal/pkgchecker/ecosystems.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,34 @@ package pkgchecker

import (
"fmt"
"io"
"net/http"
"regexp"
"slices"
"strings"

"golang.org/x/mod/module"
"golang.org/x/mod/semver"

"github.com/ossf/osv-schema/linter/internal/faulttolerant"

pep440 "github.com/aquasecurity/go-pep440-version"

"github.com/tidwall/gjson"
)

// Ecosystem support is a work in progress.
var SupportedEcosystems = []string{
"Go",
"PyPI",
"crates.io",
"npm",
"NuGet",
"RubyGems",
"Packagist",
"Pub",
"Hackage",
"Maven",
}

// EcosystemBaseURLs maps ecosystems to their base API URLs.
var EcosystemBaseURLs = map[string]string{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's an opportunity to DRY this out a bit, and we can use the keys of EcosystemBaseURLs to infer what is now duplicated into SupportedEcosystems.

"Go": "https://proxy.golang.org",
"PyPI": "https://pypi.org/pypi",
"crates.io": "https://crates.io/api/v1/crates",
"npm": "https://registry.npmjs.org",
"NuGet": "https://api.nuget.org/v3-flatcontainer",
"RubyGems": "https://rubygems.org/api/v1/gems",
"Packagist": "https://repo.packagist.org/p2",
"Pub": "https://pub.dev/api/packages",
"Hackage": "https://hackage.haskell.org/package",
"Maven": "https://search.maven.org/solrsearch/select",
hogo6002 marked this conversation as resolved.
Show resolved Hide resolved
}

// Dispatcher for ecosystem-specific package existence checking.
Expand All @@ -40,7 +48,7 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool {
case "CRAN":
return true
case "crates.io":
return true
return existsInCrates(pkg)
case "Debian":
return true
case "GIT":
Expand All @@ -52,33 +60,33 @@ func ExistsInEcosystem(pkg string, ecosystem string) bool {
case "GSD":
return true
case "Hackage":
return true
return existsInHackage(pkg)
case "Hex":
return true
case "Linux":
return true
case "Maven":
return true
return existsInMaven(pkg)
case "npm":
return true
return existsInNpm(pkg)
case "NuGet":
return true
return existsInNuget(pkg)
case "openSUSE":
return true
case "OSS-Fuzz":
return true
case "Packagist":
return true
return existsInPackagist(pkg)
case "Pub":
return true
return existsInPub(pkg)
case "PyPI":
return existsInPyPI(pkg)
case "Red Hat":
return true
case "Rocky Linux":
return true
case "RubyGems":
return true
return existsInRubyGems(pkg)
case "SUSE":
return true
case "SwiftURL":
Expand Down Expand Up @@ -173,213 +181,3 @@ func VersionsExistInEcosystem(pkg string, versions []string, ecosystem string) e
}
return fmt.Errorf("unsupported ecosystem: %s", ecosystem)
}

// Validate the existence of a package in PyPI.
func existsInPyPI(pkg string) bool {
packageInstanceURL := fmt.Sprintf("https://pypi.org/pypi/%s/json", strings.ToLower(pkg))

// This 404's for non-existent packages.
resp, err := faulttolerant.Head(packageInstanceURL)
if err != nil {
return false
}

return resp.StatusCode == http.StatusOK
}

// Confirm that all specified versions of a package exist in PyPI.
func versionsExistInPyPI(pkg string, versions []string) error {
// https://packaging.python.org/en/latest/specifications/name-normalization/
pythonNormalizationRegex := regexp.MustCompile(`[-_.]+`)
pkgNormalized := strings.ToLower(pythonNormalizationRegex.ReplaceAllString(pkg, "-"))
packageInstanceURL := fmt.Sprintf("https://pypi.org/pypi/%s/json", pkgNormalized)

// This 404's for non-existent packages.
resp, err := faulttolerant.Get(packageInstanceURL)
if err != nil {
return fmt.Errorf("unable to validate package: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL)
}

// Parse the known versions from the JSON.
respJSON, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to retrieve JSON for %q: %v", pkg, err)
}
// Fetch all known versions of package.
versionsInPyPy := []string{}
releases := gjson.GetBytes(respJSON, "releases.@keys")
releases.ForEach(func(key, value gjson.Result) bool {
versionsInPyPy = append(versionsInPyPy, value.String())
return true // keep iterating.
})
// Determine which referenced versions are missing.
versionsMissing := []string{}
for _, versionToCheckFor := range versions {
versionFound := false
vc, err := pep440.Parse(versionToCheckFor)
if err != nil {
versionsMissing = append(versionsMissing, versionToCheckFor)
continue
}
for _, pkgversion := range versionsInPyPy {
pv, err := pep440.Parse(pkgversion)
if err != nil {
continue
}
if pv.Equal(vc) {
versionFound = true
break
}
}
if versionFound {
continue
}
versionsMissing = append(versionsMissing, versionToCheckFor)
}
if len(versionsMissing) > 0 {
return &MissingVersionsError{Package: pkg, Ecosystem: "PyPI", Missing: versionsMissing, Known: versionsInPyPy}
}

return nil
}

// Validate the existence of a package in Go.
func existsInGo(pkg string) bool {
// Of course the Go runtime exists :-)
if pkg == "stdlib" || pkg == "toolchain" {
return true
}

// The Go Module Proxy seems to require package names to be lowercase.
// GitHub URLs are known to be case-insensitive.
if strings.HasPrefix(pkg, "github.com/") {
pkg = strings.ToLower(pkg)
}

packageInstanceURL := fmt.Sprintf("https://proxy.golang.org/%s/@v/list", pkg)

// This 404's for non-existent packages.
resp, err := faulttolerant.Head(packageInstanceURL)
if err != nil {
return false
}
return resp.StatusCode == http.StatusOK
}

// Confirm that all specified versions of a package exist in Go.
func versionsExistInGo(pkg string, versions []string) error {
if pkg == "stdlib" || pkg == "toolchain" {
return goVersionsExist(versions)
}

// The Go Module Proxy seems to require package names to be lowercase.
// GitHub URLs are known to be case-insensitive.
if strings.HasPrefix(pkg, "github.com/") {
pkg = strings.ToLower(pkg)
}

packageInstanceURL := fmt.Sprintf("https://proxy.golang.org/%s/@v/list", pkg)

// This 404's for non-existent packages.
resp, err := faulttolerant.Get(packageInstanceURL)
if err != nil {
return fmt.Errorf("unable to validate package: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unable to validate package: %q for %s", resp.Status, packageInstanceURL)
}

// Load the known versions from the list provided.
respBytes, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to retrieve versions for for %q: %v", pkg, err)
}
// Fetch all known versions of package.
versionsInGo := strings.Split(strings.TrimSpace(string(respBytes)), "\n")
// It seems that an empty version set is plausible. Unreleased?
// e.g. github.com/nanobox-io/golang-nanoauth
if len(versionsInGo[0]) == 0 {
versionsInGo = []string{}
}
if len(versionsInGo) == 0 {
// TODO: This is warning-level worthy if warnings were a thing...
return nil
}

// Determine which referenced versions are missing.
versionsMissing := []string{}
for _, versionToCheckFor := range versions {
// First, detect pseudo-version and skip it.
if module.IsPseudoVersion("v" + versionToCheckFor) {
// TODO: Try mapping the pseudo-version to a base version and
// checking for that instead of skipping.
continue
}
if slices.Contains(versionsInGo, semver.Canonical("v"+versionToCheckFor)) {
continue
}
versionsMissing = append(versionsMissing, versionToCheckFor)
}
if len(versionsMissing) > 0 {
return &MissingVersionsError{Package: pkg, Ecosystem: "Go", Missing: versionsMissing, Known: versionsInGo}
}

return nil
}

// Confirm that all specified versions of Go exist.
func goVersionsExist(versions []string) error {
URL := "https://go.dev/dl/?mode=json&include=all"

resp, err := faulttolerant.Get(URL)
if err != nil {
return fmt.Errorf("unable to validate Go versions: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unable to validate package: %q for %s", resp.Status, URL)
}

// Fetch all known versions of Go.
// Parse the known versions from the JSON.
respJSON, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("unable to retrieve JSON for Go: %v", err)
}
// Fetch all known versions of package.
goVersions := []string{}
releases := gjson.GetBytes(respJSON, "#.version")
releases.ForEach(func(key, value gjson.Result) bool {
goVersions = append(goVersions, value.String())
return true // keep iterating.
})

// Determine which referenced versions are missing.
versionsMissing := []string{}
for _, versionToCheckFor := range versions {
if slices.Contains(goVersions, "go"+versionToCheckFor) {
continue
}
if semver.Prerelease("v"+versionToCheckFor) == "-0" {
// Coerce "1.16.0-0" to "1.16".
if slices.Contains(goVersions, "go"+strings.TrimPrefix(semver.MajorMinor("v"+versionToCheckFor), "v")) {
continue
}
// Coerce "1.21.0-0" to "1.21.0".
if slices.Contains(goVersions, "go"+strings.TrimPrefix(strings.TrimSuffix("v"+versionToCheckFor, semver.Prerelease("v"+versionToCheckFor)), "v")) {
continue
}
}
versionsMissing = append(versionsMissing, versionToCheckFor)
}
if len(versionsMissing) > 0 {
return fmt.Errorf("failed to find %+v for Go in %+v", versionsMissing, goVersions)
}

return nil
}
Loading
Loading