Skip to content

Commit

Permalink
Better error handling when failed to scrape data
Browse files Browse the repository at this point in the history
  • Loading branch information
Wikidepia committed Jul 8, 2024
1 parent 277dc44 commit 10c5674
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
2 changes: 0 additions & 2 deletions handlers/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"strings"

"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
"github.com/valyala/bytebufferpool"
)

Expand Down Expand Up @@ -96,7 +95,6 @@ func Embed() fiber.Handler {
views.Embed(viewsData, viewsBuf)
return c.Send(viewsBuf.Bytes())
} else if len(item.Username) == 0 {
log.Warn().Str("postID", postID).Msg("Post not found; empty username")
viewsData.Description = "Post not found"
views.Embed(viewsData, viewsBuf)
return c.Send(viewsBuf.Bytes())
Expand Down
23 changes: 13 additions & 10 deletions handlers/scraper/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ import (
"golang.org/x/sync/singleflight"
)

var gjsonNil = gjson.Result{}

var client = &fasthttp.Client{
Dial: fasthttpproxy.FasthttpProxyHTTPDialerTimeout(5 * time.Second),
ReadBufferSize: 16 * 1024,
Expand Down Expand Up @@ -79,12 +77,8 @@ func GetData(postID string) (*InstaData, error) {
item := new(InstaData)
item.PostID = postID
if err := item.ScrapeData(); err != nil {
if err != ErrNotFound {
log.Error().Str("postID", item.PostID).Err(err).Msg("Failed to get data from Instagram")
} else {
log.Warn().Str("postID", item.PostID).Err(err).Msg("Post not found")
}
return item, err
log.Error().Str("postID", item.PostID).Err(err).Msg("Failed to scrape data from Instagram")
return nil, err
}

// Replace all media urls cdn to scontent.cdninstagram.com
Expand Down Expand Up @@ -152,6 +146,7 @@ func (i *InstaData) ScrapeData() error {
return nil
}
}
log.Warn().Str("postID", i.PostID).Msg("Failed to scrape data from remote scraper")
}

req.Reset()
Expand Down Expand Up @@ -227,18 +222,26 @@ func (i *InstaData) ScrapeData() error {
return err
}
gqlData := gjson.Parse(utils.B2S(gqlValue))
// Need to show embeds even if the video is blocked
if gqlData.Get("require_login").Bool() && !videoBlocked {
return errors.New("scrapeFromGQL is blocked")
}
if gqlData.Get("data").Exists() {
log.Info().Str("postID", i.PostID).Msg("Data scraped from scrapeFromGQL")
gqlData = gqlData.Get("data")
}
}

status := gqlData.Get("status").String()
item := gqlData.Get("shortcode_media")
if !item.Exists() {
item = gqlData.Get("xdt_shortcode_media")
if !item.Exists() {
log.Error().Str("postID", i.PostID).Msg("Failed to parse data from Instagram")
return ErrNotFound
if status == "ok" {
return ErrNotFound
} else if status == "fail" {
return errors.New("scrapeFromGQL is blocked")
}
}
}

Expand Down

0 comments on commit 10c5674

Please sign in to comment.