Skip to content

Commit

Permalink
fix(crawl): skip 404 errors (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriyLewen authored May 22, 2024
1 parent ab39d06 commit 349526b
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ func (c *Crawler) Visit(ctx context.Context, url string) error {
}
defer resp.Body.Close()

// There are cases when url doesn't exist
// e.g. https://repo.maven.apache.org/maven2/io/springboot/ai/spring-ai-anthropic/
if resp.StatusCode != http.StatusOK {
return nil
}

d, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return xerrors.Errorf("can't create new goquery doc: %w", err)
Expand Down Expand Up @@ -307,6 +313,12 @@ func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, err
}
defer resp.Body.Close()

// There are cases when metadata.xml file doesn't exist
// e.g. https://repo.maven.apache.org/maven2/io/springboot/ai/spring-ai-vertex-ai-gemini-spring-boot-starter/maven-metadata.xml
if resp.StatusCode != http.StatusOK {
return nil, nil
}

var meta Metadata
if err = xml.NewDecoder(resp.Body).Decode(&meta); err != nil {
return nil, xerrors.Errorf("%s decode error: %w", url, err)
Expand Down

0 comments on commit 349526b

Please sign in to comment.