Skip to content

Commit

Permalink
refactor: use text for maven-metadata.xml
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriyLewen committed Mar 7, 2024
1 parent c42ce41 commit 9eacca0
Showing 1 changed file with 5 additions and 11 deletions.
16 changes: 5 additions & 11 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
"golang.org/x/xerrors"
)

const mavenRepoURL = "https://repo.maven.apache.org/maven2/"
const mavenRepoURL = "https://repo.maven.apache.org/maven2/xstream/xstream/"

type Crawler struct {
dir string
Expand Down Expand Up @@ -146,22 +146,16 @@ func (c *Crawler) Visit(ctx context.Context, url string) error {
var children []string
var foundMetadata bool
d.Find("a").Each(func(i int, selection *goquery.Selection) {
fileName := selection.Text()
// Find `maven-metadata.xml` to get artifact URL and determine the ArtifactID and GroupID
if fileName == "maven-metadata.xml" {
link := selection.Text()
if link == "maven-metadata.xml" {
foundMetadata = true
return
} else if fileName == "../" || !strings.HasSuffix(fileName, "/") {
} else if link == "../" || !strings.HasSuffix(link, "/") {
// only `../` and dirs have `/` suffix. We don't need to check other files.
return
}

// There are times when the dir name is very long.
// e.g. https://repo.maven.apache.org/maven2/ai/grakn/grakn-bootup/
// We need to use `href` to make sure we use the correct dir name
if dir, ok := selection.Attr("href"); ok {
children = append(children, dir)
}
children = append(children, link)
})

if foundMetadata {
Expand Down

0 comments on commit 9eacca0

Please sign in to comment.