diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index feefde9..df53209 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -48,7 +48,8 @@ jobs: - name: Upload assets to GHCR run: | + lowercase_repo=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') oras version oras push --artifact-type application/vnd.aquasec.trivy.config.v1+json \ - ghcr.io/${{ github.repository }}:${DB_VERSION} \ + ghcr.io/${lowercase_repo}:${DB_VERSION} \ javadb.tar.gz:application/vnd.aquasec.trivy.javadb.layer.v1.tar+gzip diff --git a/go.mod b/go.mod index 1560382..6bcc437 100644 --- a/go.mod +++ b/go.mod @@ -6,9 +6,10 @@ require ( github.com/PuerkitoBio/goquery v1.5.1 github.com/cheggaaa/pb/v3 v3.1.0 github.com/hashicorp/go-retryablehttp v0.7.2 + github.com/samber/lo v1.39.0 github.com/spf13/cobra v1.6.1 github.com/stretchr/testify v1.8.1 - golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 + golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 k8s.io/utils v0.0.0-20230115233650-391b47cb4029 modernc.org/sqlite v1.20.3 @@ -31,10 +32,11 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - golang.org/x/mod v0.3.0 // indirect - golang.org/x/net v0.0.0-20201021035429-f5854403a974 // indirect + golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect + golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 // indirect + golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f // indirect golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect - golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 // indirect + golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/uint128 v1.2.0 // indirect modernc.org/cc/v3 v3.40.0 // indirect diff --git a/go.sum b/go.sum index 91ffddd..c8600cb 100644 --- a/go.sum +++ b/go.sum @@ -45,6 +45,8 @@ github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJ github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= +github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -57,38 +59,26 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.3.0 h1:RM4zey1++hCTbCVQfnWeKs9/IEsaBLA8vTkd0WVtmH4= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= +golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= +golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57 h1:LQmS1nU0twXLA96Kt7U9qtHJEbBk3z6Q0V4UXjZkpr4= +golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 h1:SQFwaSi55rU7vdNs9Yr0Z324VNlrF+0wMqRXT4St8ck= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f h1:OfiFi4JbukWwe3lzw+xunroH1mnC1e2Gy5cxNJApiSY= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= +golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab h1:2QkjZIsXupsJbJIdSjjUOgWK3aEtzyuh2mPt3l/CkeU= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 h1:M8tBwCtWD/cZV9DZpFYRUgaymAYAr+aIUTWzDaM3uPs= -golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023 h1:0c3L82FDQ5rt1bjTBlchS8t6RQ6299/+5bWMnRLh+uI= +golang.org/x/tools v0.1.8-0.20211029000441-d6a9af8af023/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= diff --git a/pkg/crawler/crawler.go b/pkg/crawler/crawler.go index 4573505..682ddb4 100644 --- a/pkg/crawler/crawler.go +++ b/pkg/crawler/crawler.go @@ -1,24 +1,26 @@ package crawler import ( + "bytes" "context" "encoding/hex" "encoding/xml" "fmt" - "github.com/aquasecurity/trivy-java-db/pkg/fileutil" - "github.com/aquasecurity/trivy-java-db/pkg/types" "io" "log" "net/http" - "net/url" "path/filepath" "strings" "sync" "github.com/PuerkitoBio/goquery" "github.com/hashicorp/go-retryablehttp" + "github.com/samber/lo" "golang.org/x/sync/semaphore" "golang.org/x/xerrors" + + "github.com/aquasecurity/trivy-java-db/pkg/fileutil" + "github.com/aquasecurity/trivy-java-db/pkg/types" ) const mavenRepoURL = "https://repo.maven.apache.org/maven2/" @@ -27,10 +29,11 @@ type Crawler struct { dir string http *retryablehttp.Client - rootUrl string - wg sync.WaitGroup - urlCh chan string - limit *semaphore.Weighted + rootUrl string + wg sync.WaitGroup + urlCh chan string + limit *semaphore.Weighted + wrongSHA1Values []string } type Option struct { @@ -118,6 +121,12 @@ loop: } } log.Println("Crawl completed") + if len(c.wrongSHA1Values) > 0 { + log.Println("Wrong sha1 files:") + for _, wrongSHA1 := range c.wrongSHA1Values { + log.Println(wrongSHA1) + } + } return nil } @@ -140,7 +149,7 @@ func (c *Crawler) Visit(ctx context.Context, url string) error { var children []string var foundMetadata bool d.Find("a").Each(func(i int, selection *goquery.Selection) { - link := selection.Text() + link := linkFromSelection(selection) if link == "maven-metadata.xml" { foundMetadata = true return @@ -148,7 +157,6 @@ func (c *Crawler) Visit(ctx context.Context, url string) error { // only `../` and dirs have `/` suffix. We don't need to check other files. return } - children = append(children, link) }) @@ -158,7 +166,7 @@ func (c *Crawler) Visit(ctx context.Context, url string) error { return xerrors.Errorf("metadata parse error: %w", err) } if meta != nil { - if err = c.crawlSHA1(ctx, url, meta); err != nil { + if err = c.crawlSHA1(ctx, url, meta, children); err != nil { return err } // Return here since there is no need to crawl dirs anymore. @@ -183,34 +191,63 @@ func (c *Crawler) Visit(ctx context.Context, url string) error { return nil } -func (c *Crawler) crawlSHA1(ctx context.Context, baseURL string, meta *Metadata) error { - var versions []Version - for _, version := range meta.Versioning.Versions { - // some metadata may contain characters that require escaping - // for example 1.0.7?: - // https://repo.maven.apache.org/maven2/io/github/visal-99/b24paysdk/maven-metadata.xml - version = url.QueryEscape(version) - sha1FileName := fmt.Sprintf("/%s-%s.jar.sha1", url.QueryEscape(meta.ArtifactID), version) - sha1, err := c.fetchSHA1(ctx, baseURL+version+sha1FileName) +func (c *Crawler) crawlSHA1(ctx context.Context, baseURL string, meta *Metadata, dirs []string) error { + var foundVersions []Version + // Check each version dir to find links to `*.jar.sha1` files. + for _, dir := range dirs { + dirURL := baseURL + dir + sha1Urls, err := c.sha1Urls(ctx, dirURL) if err != nil { - return err + return xerrors.Errorf("unable to get list of sha1 files from %q: %s", dirURL, err) } - if len(sha1) != 0 { - v := Version{ - Version: version, - SHA1: sha1, + + // Remove the `/` suffix to correctly compare file versions with version from directory name. + dirVersion := strings.TrimSuffix(dir, "/") + var dirVersionSha1 []byte + var versions []Version + for _, sha1Url := range sha1Urls { + sha1, err := c.fetchSHA1(ctx, sha1Url) + if err != nil { + return xerrors.Errorf("unable to fetch sha1: %s", err) } - versions = append(versions, v) + if ver := versionFromSha1URL(meta.ArtifactID, sha1Url); ver != "" && len(sha1) != 0 { + // Save sha1 for the file where the version is equal to the version from the directory name in order to remove duplicates later + // Avoid overwriting dirVersion when inserting versions into the database (sha1 is uniq blob) + // e.g. `cudf-0.14-cuda10-1.jar.sha1` should not overwrite `cudf-0.14.jar.sha1` + // https://repo.maven.apache.org/maven2/ai/rapids/cudf/0.14/ + if ver == dirVersion { + dirVersionSha1 = sha1 + } else { + versions = append(versions, Version{ + Version: ver, + SHA1: sha1, + }) + } + } + } + // Remove duplicates of dirVersionSha1 + versions = lo.Filter(versions, func(v Version, _ int) bool { + return !bytes.Equal(v.SHA1, dirVersionSha1) + }) + + if dirVersionSha1 != nil { + versions = append(versions, Version{ + Version: dirVersion, + SHA1: dirVersionSha1, + }) } + + foundVersions = append(foundVersions, versions...) } - if len(versions) == 0 { + + if len(foundVersions) == 0 { return nil } index := &Index{ GroupID: meta.GroupID, ArtifactID: meta.ArtifactID, - Versions: versions, + Versions: foundVersions, ArchiveType: types.JarType, } fileName := fmt.Sprintf("%s.json", index.ArtifactID) @@ -221,7 +258,45 @@ func (c *Crawler) crawlSHA1(ctx context.Context, baseURL string, meta *Metadata) return nil } +func (c *Crawler) sha1Urls(ctx context.Context, url string) ([]string, error) { + req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, xerrors.Errorf("unable to new HTTP request: %w", err) + } + resp, err := c.http.Do(req) + if err != nil { + return nil, xerrors.Errorf("http get error (%s): %w", url, err) + } + defer func() { _ = resp.Body.Close() }() + + d, err := goquery.NewDocumentFromReader(resp.Body) + if err != nil { + return nil, xerrors.Errorf("can't create new goquery doc: %w", err) + } + + // Version dir may contain multiple `*jar.sha1` files. + // e.g. https://repo1.maven.org/maven2/org/jasypt/jasypt/1.9.3/ + // We need to take all links. + var sha1URLs []string + d.Find("a").Each(func(i int, selection *goquery.Selection) { + link := linkFromSelection(selection) + // Don't include sources, test, javadocs, scaladoc files + if strings.HasSuffix(link, ".jar.sha1") && !strings.HasSuffix(link, "sources.jar.sha1") && + !strings.HasSuffix(link, "test.jar.sha1") && !strings.HasSuffix(link, "tests.jar.sha1") && + !strings.HasSuffix(link, "javadoc.jar.sha1") && !strings.HasSuffix(link, "scaladoc.jar.sha1") { + sha1URLs = append(sha1URLs, url+link) + } + }) + return sha1URLs, nil +} + func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, error) { + // We need to skip metadata.xml files from groupID folder + // e.g. https://repo.maven.apache.org/maven2/args4j/maven-metadata.xml + if len(strings.Split(url, "/")) < 7 { + return nil, nil + } + req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, xerrors.Errorf("unable to new HTTP request: %w", err) @@ -236,16 +311,17 @@ func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, err if err = xml.NewDecoder(resp.Body).Decode(&meta); err != nil { return nil, xerrors.Errorf("%s decode error: %w", url, err) } + // Skip metadata without `GroupID` and ArtifactID` fields + // e.g. https://repo.maven.apache.org/maven2/at/molindo/maven-metadata.xml + if meta.ArtifactID == "" || meta.GroupID == "" { + return nil, nil + } + // we don't need metadata.xml files from version folder // e.g. https://repo.maven.apache.org/maven2/HTTPClient/HTTPClient/0.3-3/maven-metadata.xml if len(meta.Versioning.Versions) == 0 { return nil, nil } - // also we need to skip metadata.xml files from groupID folder - // e.g. https://repo.maven.apache.org/maven2/args4j/maven-metadata.xml - if len(strings.Split(url, "/")) < 7 { - return nil, nil - } return &meta, nil } @@ -258,9 +334,11 @@ func (c *Crawler) fetchSHA1(ctx context.Context, url string) ([]byte, error) { if err != nil { return nil, xerrors.Errorf("http get error (%s): %w", url, err) } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() - // some projects don't have xxx.jar and xxx.jar.sha1 files + // These are cases when version dir contains link to sha1 file + // But file doesn't exist + // e.g. https://repo.maven.apache.org/maven2/com/adobe/aem/uber-jar/6.4.8.2/uber-jar-6.4.8.2-sources.jar.sha1 if resp.StatusCode == http.StatusNotFound { return nil, nil // TODO add special error for this } @@ -287,7 +365,33 @@ func (c *Crawler) fetchSHA1(ctx context.Context, url string) ([]byte, error) { } } if len(sha1b) == 0 { - return nil, xerrors.Errorf("failed to decode sha1 %s: %w", url, err) + c.wrongSHA1Values = append(c.wrongSHA1Values, fmt.Sprintf("%s (%s)", url, err)) + return nil, nil } return sha1b, nil } + +func versionFromSha1URL(artifactId, sha1URL string) string { + ss := strings.Split(sha1URL, "/") + fileName := ss[len(ss)-1] + if !strings.HasPrefix(fileName, artifactId) { + return "" + } + return strings.TrimSuffix(strings.TrimPrefix(fileName, artifactId+"-"), ".jar.sha1") +} + +// linkFromSelection returns the link from goquery.Selection. +// There are times when maven breaks `text` - it removes part of the `text` and adds the suffix `...` (`.../` for dirs). +// e.g. `v1.1.0-226-g847ecff2d8e26f249422247d7665fe15.../` +// In this case we should take `href`. +// But we don't need to get `href` if the text isn't broken. +// To avoid checking unnecessary links. +// e.g. `
../`
+func linkFromSelection(selection *goquery.Selection) string {
+	link := selection.Text()
+	// maven uses `.../` suffix for dirs and `...` suffix for files.
+	if href, ok := selection.Attr("href"); ok && (strings.HasSuffix(link, ".../") || (strings.HasSuffix(link, "..."))) {
+		link = href
+	}
+	return link
+}
diff --git a/pkg/crawler/crawler_test.go b/pkg/crawler/crawler_test.go
index 46a5d40..5cd9fee 100644
--- a/pkg/crawler/crawler_test.go
+++ b/pkg/crawler/crawler_test.go
@@ -22,13 +22,18 @@ func TestCrawl(t *testing.T) {
 		{
 			name: "happy path",
 			fileNames: map[string]string{
-				"/maven2/":                               "testdata/index.html",
-				"/maven2/abbot/":                         "testdata/abbot.html",
-				"/maven2/abbot/abbot/":                   "testdata/abbot_abbot.html",
-				"/maven2/abbot/abbot/maven-metadata.xml": "testdata/maven-metadata.xml",
-				"/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/abbot-0.12.3.jar.sha1",
-				"/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/abbot-0.13.0.jar.sha1",
-				"/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1":   "testdata/abbot-1.4.0.jar.sha1",
+				"/maven2/":                                              "testdata/index.html",
+				"/maven2/abbot/":                                        "testdata/abbot.html",
+				"/maven2/abbot/abbot/":                                  "testdata/abbot_abbot.html",
+				"/maven2/abbot/abbot/maven-metadata.xml":                "testdata/maven-metadata.xml",
+				"/maven2/abbot/abbot/0.12.3/":                           "testdata/abbot_abbot_0.12.3.html",
+				"/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1":      "testdata/abbot-0.12.3.jar.sha1",
+				"/maven2/abbot/abbot/0.13.0/":                           "testdata/abbot_abbot_0.13.0.html",
+				"/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1":      "testdata/abbot-0.13.0.jar.sha1",
+				"/maven2/abbot/abbot/0.13.0/abbot-0.13.0-copy.jar.sha1": "testdata/abbot-0.13.0-copy.jar.sha1",
+				"/maven2/abbot/abbot/1.4.0/":                            "testdata/abbot_abbot_1.4.0.html",
+				"/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1":        "testdata/abbot-1.4.0.jar.sha1",
+				"/maven2/abbot/abbot/1.4.0/abbot-1.4.0-lite.jar.sha1":   "testdata/abbot-1.4.0-lite.jar.sha1",
 			},
 			goldenPath: "testdata/golden/abbot.json",
 			filePath:   "indexes/abbot/abbot.json",
diff --git a/pkg/crawler/testdata/abbot-0.13.0-copy.jar.sha1 b/pkg/crawler/testdata/abbot-0.13.0-copy.jar.sha1
new file mode 100644
index 0000000..ad11b66
--- /dev/null
+++ b/pkg/crawler/testdata/abbot-0.13.0-copy.jar.sha1
@@ -0,0 +1 @@
+596d91e67631b0deb05fb685d8d1b6735f3e4f60
\ No newline at end of file
diff --git a/pkg/crawler/testdata/abbot-1.4.0-lite.jar.sha1 b/pkg/crawler/testdata/abbot-1.4.0-lite.jar.sha1
new file mode 100644
index 0000000..72d44f4
--- /dev/null
+++ b/pkg/crawler/testdata/abbot-1.4.0-lite.jar.sha1
@@ -0,0 +1 @@
+0547ab037068afa2026925bd94bfb9fcfcec9761
\ No newline at end of file
diff --git a/pkg/crawler/testdata/abbot.html b/pkg/crawler/testdata/abbot.html
index 3f5e5a7..e6c2f99 100644
--- a/pkg/crawler/testdata/abbot.html
+++ b/pkg/crawler/testdata/abbot.html
@@ -16,8 +16,9 @@ 

abbot


-
../
-abbot/                                                           -         -       			-         -
+		
+../
+abbot/                                                           -         -
 		

diff --git a/pkg/crawler/testdata/abbot_abbot.html b/pkg/crawler/testdata/abbot_abbot.html index e4d3d83..ecba60b 100644 --- a/pkg/crawler/testdata/abbot_abbot.html +++ b/pkg/crawler/testdata/abbot_abbot.html @@ -17,12 +17,14 @@

abbot/abbot


../
-0.12.3/                                           2005-09-20 05:44         -      
-0.13.0/                                           2005-09-20 05:44         -      
-1.4.0/                                            2015-09-22 16:03         -      
-maven-metadata.xml                                2015-09-24 14:18       402      
-maven-metadata.xml.md5                            2015-09-24 14:18        32      
-maven-metadata.xml.sha1                           2015-09-24 14:18        40      
+../
+0.12..../                                           2005-09-20 05:44         -
+0.13.0/                                           2005-09-20 05:44         -
+1.4.0/                                            2015-09-22 16:03         -
+1.5.0/                                            2015-09-22 16:03         -
+maven-metadata.xml                                2015-09-24 14:18       402
+maven-metadata.xml.md5                            2015-09-24 14:18        32
+maven-metadata.xml.sha1                           2015-09-24 14:18        40
 		

diff --git a/pkg/crawler/testdata/abbot_abbot_0.12.3.html b/pkg/crawler/testdata/abbot_abbot_0.12.3.html index e6ef130..805d496 100644 --- a/pkg/crawler/testdata/abbot_abbot_0.12.3.html +++ b/pkg/crawler/testdata/abbot_abbot_0.12.3.html @@ -17,15 +17,15 @@

abbot/abbot/0.12.3


../
-abbot-0.12.3.jar                                  2005-09-20 05:44    689791      
-abbot-0.12.3.jar.md5                              2005-09-20 05:44        32      
-abbot-0.12.3.jar.sha1                             2005-09-20 05:44        40      
-abbot-0.12.3.pom                                  2005-09-20 05:44       166      
-abbot-0.12.3.pom.md5                              2005-09-20 05:44       128      
-abbot-0.12.3.pom.sha1                             2005-09-20 05:44       136      
-maven-metadata.xml                                2005-09-20 05:44       110      
-maven-metadata.xml.md5                            2005-09-20 05:44        74      
-maven-metadata.xml.sha1                           2005-09-20 05:44       129      
+abbot-0.12.3.jar                                  2005-09-20 05:44    689791
+abbot-0.12.3.jar.md5                              2005-09-20 05:44        32
+abbot-0.12.3.j...                             2005-09-20 05:44        40
+abbot-0.12.3.pom                                  2005-09-20 05:44       166
+abbot-0.12.3.pom.md5                              2005-09-20 05:44       128
+abbot-0.12.3.pom.sha1                             2005-09-20 05:44       136
+maven-metadata.xml                                2005-09-20 05:44       110
+maven-metadata.xml.md5                            2005-09-20 05:44        74
+maven-metadata.xml.sha1                           2005-09-20 05:44       129
 		

diff --git a/pkg/crawler/testdata/abbot_abbot_0.13.0.html b/pkg/crawler/testdata/abbot_abbot_0.13.0.html index a76058c..b5661f0 100644 --- a/pkg/crawler/testdata/abbot_abbot_0.13.0.html +++ b/pkg/crawler/testdata/abbot_abbot_0.13.0.html @@ -17,15 +17,18 @@

abbot/abbot/0.13.0


../
-abbot-0.13.0.jar                                  2005-09-20 05:44    779426      
-abbot-0.13.0.jar.md5                              2005-09-20 05:44        32      
-abbot-0.13.0.jar.sha1                             2005-09-20 05:44        40      
-abbot-0.13.0.pom                                  2005-09-20 05:44       166      
-abbot-0.13.0.pom.md5                              2005-09-20 05:44       128      
-abbot-0.13.0.pom.sha1                             2005-09-20 05:44       136      
-maven-metadata.xml                                2005-09-20 05:44       110      
-maven-metadata.xml.md5                            2005-09-20 05:44        74      
-maven-metadata.xml.sha1                           2005-09-20 05:44       129      
+abbot-0.13.0.jar                                  2005-09-20 05:44    779426
+abbot-0.13.0.jar.md5                              2005-09-20 05:44        32
+abbot-0.13.0.jar.sha1                             2005-09-20 05:44        40
+abbot-0.13.0.jar                                  2005-09-20 05:44    779426      
+abbot-0.13.0.jar.md5                              2005-09-20 05:44        32      
+abbot-0.13.0.jar.sha1                             2005-09-20 05:44        40      
+abbot-0.13.0.pom                                  2005-09-20 05:44       166      
+abbot-0.13.0.pom.md5                              2005-09-20 05:44       128      
+abbot-0.13.0.pom.sha1                             2005-09-20 05:44       136      
+maven-metadata.xml                                2005-09-20 05:44       110      
+maven-metadata.xml.md5                            2005-09-20 05:44        74      
+maven-metadata.xml.sha1                           2005-09-20 05:44       129      
 		

diff --git a/pkg/crawler/testdata/abbot_abbot_1.4.0.html b/pkg/crawler/testdata/abbot_abbot_1.4.0.html index 081048f..360eac0 100644 --- a/pkg/crawler/testdata/abbot_abbot_1.4.0.html +++ b/pkg/crawler/testdata/abbot_abbot_1.4.0.html @@ -17,30 +17,29 @@

abbot/abbot/1.4.0


../
-abbot-1.4.0-javadoc.jar                           2015-09-22 16:03   1404960      
-abbot-1.4.0-javadoc.jar.asc                       2015-09-22 16:03       490      
-abbot-1.4.0-javadoc.jar.asc.md5                   2015-09-22 16:03        32      
-abbot-1.4.0-javadoc.jar.asc.sha1                  2015-09-22 16:03        40      
-abbot-1.4.0-javadoc.jar.md5                       2015-09-22 16:03        32      
-abbot-1.4.0-javadoc.jar.sha1                      2015-09-22 16:03        40      
-abbot-1.4.0-sources.jar                           2015-09-22 16:03    310023      
-abbot-1.4.0-sources.jar.asc                       2015-09-22 16:03       490      
-abbot-1.4.0-sources.jar.asc.md5                   2015-09-22 16:03        32      
-abbot-1.4.0-sources.jar.asc.sha1                  2015-09-22 16:03        40      
-abbot-1.4.0-sources.jar.md5                       2015-09-22 16:03        32      
-abbot-1.4.0-sources.jar.sha1                      2015-09-22 16:03        40      
-abbot-1.4.0.jar                                   2015-09-22 16:03    687192      
-abbot-1.4.0.jar.asc                               2015-09-22 16:03       490      
-abbot-1.4.0.jar.asc.md5                           2015-09-22 16:03        32      
-abbot-1.4.0.jar.asc.sha1                          2015-09-22 16:03        40      
-abbot-1.4.0.jar.md5                               2015-09-22 16:03        32      
-abbot-1.4.0.jar.sha1                              2015-09-22 16:03        40      
-abbot-1.4.0.pom                                   2015-09-22 16:03      1292      
-abbot-1.4.0.pom.asc                               2015-09-22 16:03       490      
-abbot-1.4.0.pom.asc.md5                           2015-09-22 16:03        32      
-abbot-1.4.0.pom.asc.sha1                          2015-09-22 16:03        40      
-abbot-1.4.0.pom.md5                               2015-09-22 16:03        32      
-abbot-1.4.0.pom.sha1                              2015-09-22 16:03        40      
+			jasypt-1.9.3-javadoc.jar                          2019-05-25 16:34    748409
+abbot-1.4.0-lite.jar                             2019-05-25 16:34     74953
+abbot-1.4.0-lite.jar.asc                         2019-05-25 16:34       516
+abbot-1.4.0-lite.jar.md5                         2019-05-25 16:34        32
+abbot-1.4.0-lite.jar.sha1                    2019-05-25 16:34        40
+abbot-1.4.0-sources.jar                           2015-09-22 16:03    310023      
+abbot-1.4.0-sources.jar.asc                       2015-09-22 16:03       490      
+abbot-1.4.0-sources.jar.asc.md5                   2015-09-22 16:03        32      
+abbot-1.4.0-sources.jar.asc.sha1                  2015-09-22 16:03        40      
+abbot-1.4.0-sources.jar.md5                       2015-09-22 16:03        32      
+abbot-1.4.0-sources.jar.sha1                      2015-09-22 16:03        40      
+abbot-1.4.0.jar                                   2015-09-22 16:03    687192      
+abbot-1.4.0.jar.asc                               2015-09-22 16:03       490      
+abbot-1.4.0.jar.asc.md5                           2015-09-22 16:03        32      
+abbot-1.4.0.jar.asc.sha1                          2015-09-22 16:03        40      
+abbot-1.4.0.jar.md5                               2015-09-22 16:03        32      
+abbot-1.4.0.jar.sha1                              2015-09-22 16:03        40      
+abbot-1.4.0.pom                                   2015-09-22 16:03      1292      
+abbot-1.4.0.pom.asc                               2015-09-22 16:03       490      
+abbot-1.4.0.pom.asc.md5                           2015-09-22 16:03        32      
+abbot-1.4.0.pom.asc.sha1                          2015-09-22 16:03        40      
+abbot-1.4.0.pom.md5                               2015-09-22 16:03        32      
+abbot-1.4.0.pom.sha1                              2015-09-22 16:03        40      
 		

diff --git a/pkg/crawler/testdata/golden/abbot.json b/pkg/crawler/testdata/golden/abbot.json index 16130ad..89791ee 100644 --- a/pkg/crawler/testdata/golden/abbot.json +++ b/pkg/crawler/testdata/golden/abbot.json @@ -10,6 +10,10 @@ "Version": "0.13.0", "SHA1": "WW2R5nYxsN6wX7aF2NG2c18+T2A=" }, + { + "Version": "1.4.0-lite", + "SHA1": "BUerA3Bor6ICaSW9lL+5/Pzsl2E=" + }, { "Version": "1.4.0", "SHA1": "ojY2RqndBZVWM7RQAQtZohr4pCM=" diff --git a/pkg/crawler/testdata/maven-metadata.xml b/pkg/crawler/testdata/maven-metadata.xml index 51ec1ab..8ef2187 100644 --- a/pkg/crawler/testdata/maven-metadata.xml +++ b/pkg/crawler/testdata/maven-metadata.xml @@ -7,7 +7,6 @@ 1.4.0 0.12.3 - 0.13.0 1.4.0 20150924141841