Skip to content

Commit

Permalink
fix: use title to check sha1 file names
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriyLewen committed Mar 6, 2024
1 parent 4b4ea82 commit 3ed9e32
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
12 changes: 8 additions & 4 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,10 +256,14 @@ func (c *Crawler) sha1Urls(ctx context.Context, url string) ([]string, error) {
// We need to take all links.
var sha1URLs []string
d.Find("a").Each(func(i int, selection *goquery.Selection) {
link := selection.Text()
if strings.HasSuffix(link, ".jar.sha1") && !strings.HasSuffix(link, "sources.jar.sha1") &&
!strings.HasSuffix(link, "test.jar.sha1") && !strings.HasSuffix(link, "javadoc.jar.sha1") {
sha1URLs = append(sha1URLs, url+link)
// There are times when the file name is very long.
// e.g. https://repo.maven.apache.org/maven2/africa/absa/inception-oauth2-resource-server/1.0.0/
// We need to use `title` to make sure we use the correct filename
if fileName, ok := selection.Attr("title"); ok {
if strings.HasSuffix(fileName, ".jar.sha1") && !strings.HasSuffix(fileName, "sources.jar.sha1") &&
!strings.HasSuffix(fileName, "test.jar.sha1") && !strings.HasSuffix(fileName, "javadoc.jar.sha1") {
sha1URLs = append(sha1URLs, url+fileName)
}
}
})
return sha1URLs, nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/crawler/testdata/abbot_abbot_0.12.3.html
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ <h1>abbot/abbot/0.12.3</h1>
<pre id="contents"><a href="https://repo.maven.apache.org/maven2/abbot/abbot/">../</a>
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar" title="abbot-0.12.3.jar">abbot-0.12.3.jar</a> 2005-09-20 05:44 689791
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.md5" title="abbot-0.12.3.jar.md5">abbot-0.12.3.jar.md5</a> 2005-09-20 05:44 32
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1" title="abbot-0.12.3.jar.sha1">abbot-0.12.3.jar.sha1</a> 2005-09-20 05:44 40
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1" title="abbot-0.12.3.jar.sha1">abbot-0.12.3.j...</a> 2005-09-20 05:44 40
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.pom" title="abbot-0.12.3.pom">abbot-0.12.3.pom</a> 2005-09-20 05:44 166
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.pom.md5" title="abbot-0.12.3.pom.md5">abbot-0.12.3.pom.md5</a> 2005-09-20 05:44 128
<a href="https://repo.maven.apache.org/maven2/abbot/abbot/0.12.3/abbot-0.12.3.pom.sha1" title="abbot-0.12.3.pom.sha1">abbot-0.12.3.pom.sha1</a> 2005-09-20 05:44 136
Expand Down

0 comments on commit 3ed9e32

Please sign in to comment.