Skip to content

Commit

Permalink
Fix caching when remote scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
Wikidepia committed Jun 19, 2024
1 parent 0438e60 commit 2e1fda5
Showing 1 changed file with 46 additions and 45 deletions.
91 changes: 46 additions & 45 deletions handlers/data/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func (i *InstaData) GetData(postID string) error {
}

// Scrape from remote scraper, if available
var bb []byte // marshaled data
if len(RemoteScraperAddr) > 0 {
req, res := fasthttp.AcquireRequest(), fasthttp.AcquireResponse()
defer func() {
Expand All @@ -81,66 +82,66 @@ func (i *InstaData) GetData(postID string) error {
req.Header.Set("Accept-Encoding", "gzip, deflate, br")
req.SetRequestURI(RemoteScraperAddr + "/scrape/" + postID)
if err := client.DoTimeout(req, res, timeout); err == nil && res.StatusCode() == fasthttp.StatusOK {
gzipBody, _ := res.BodyGunzip()
if err := binary.Unmarshal(gzipBody, i); err == nil {
bb, _ = res.BodyGunzip()
if err := binary.Unmarshal(bb, i); err == nil {
log.Info().Str("postID", postID).Msg("Data parsed from remote scraper")
return nil
}
}
}
} else {

data, err := getData(postID)
if err != nil {
if err != ErrNotFound {
log.Error().Str("postID", postID).Err(err).Msg("Failed to get data from Instagram")
} else {
log.Warn().Str("postID", postID).Err(err).Msg("Post not found; err getData")
data, err := getData(postID)
if err != nil {
if err != ErrNotFound {
log.Error().Str("postID", postID).Err(err).Msg("Failed to get data from Instagram")
} else {
log.Warn().Str("postID", postID).Err(err).Msg("Post not found; err getData")
}
return err
}
return err
}

item := data.Get("shortcode_media")
if !item.Exists() {
item = data.Get("xdt_shortcode_media")
item := data.Get("shortcode_media")
if !item.Exists() {
log.Error().Str("postID", postID).Msg("Failed to parse data from Instagram")
return ErrNotFound
item = data.Get("xdt_shortcode_media")
if !item.Exists() {
log.Error().Str("postID", postID).Msg("Failed to parse data from Instagram")
return ErrNotFound
}
}
}

media := []gjson.Result{item}
if item.Get("edge_sidecar_to_children").Exists() {
media = item.Get("edge_sidecar_to_children.edges").Array()
}
media := []gjson.Result{item}
if item.Get("edge_sidecar_to_children").Exists() {
media = item.Get("edge_sidecar_to_children.edges").Array()
}

i.PostID = utils.S2B(postID)
i.PostID = utils.S2B(postID)

// Get username
i.Username = []byte(item.Get("owner.username").String())
// Get username
i.Username = []byte(item.Get("owner.username").String())

// Get caption
i.Caption = bytes.TrimSpace([]byte(item.Get("edge_media_to_caption.edges.0.node.text").String()))
// Get caption
i.Caption = bytes.TrimSpace([]byte(item.Get("edge_media_to_caption.edges.0.node.text").String()))

// Get medias
i.Medias = make([]Media, 0, len(media))
for _, m := range media {
if m.Get("node").Exists() {
m = m.Get("node")
}
mediaURL := m.Get("video_url")
if !mediaURL.Exists() {
mediaURL = m.Get("display_url")
// Get medias
i.Medias = make([]Media, 0, len(media))
for _, m := range media {
if m.Get("node").Exists() {
m = m.Get("node")
}
mediaURL := m.Get("video_url")
if !mediaURL.Exists() {
mediaURL = m.Get("display_url")
}
i.Medias = append(i.Medias, Media{
TypeName: []byte(m.Get("__typename").String()),
URL: []byte(mediaURL.String()),
})
}
i.Medias = append(i.Medias, Media{
TypeName: []byte(m.Get("__typename").String()),
URL: []byte(mediaURL.String()),
})
}

bb, err := binary.Marshal(i)
if err != nil {
log.Error().Str("postID", postID).Err(err).Msg("Failed to marshal data")
return err
bb, err = binary.Marshal(i)
if err != nil {
log.Error().Str("postID", postID).Err(err).Msg("Failed to marshal data")
return err
}
}

batch := DB.NewBatch()
Expand Down

0 comments on commit 2e1fda5

Please sign in to comment.