Skip to content

Commit

Permalink
Decompress remote scraper with zstd+dict
Browse files Browse the repository at this point in the history
  • Loading branch information
Wikidepia committed Jul 9, 2024
1 parent 88c40e1 commit 0e1199b
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ require (
github.com/elastic/go-freelru v0.13.0
github.com/gofiber/fiber/v2 v2.52.4
github.com/kelindar/binary v1.0.19
github.com/klauspost/compress v1.17.9
github.com/rs/zerolog v1.33.0
github.com/tdewolff/parse/v2 v2.7.15
github.com/tidwall/gjson v1.17.1
Expand All @@ -39,7 +40,6 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/snappy v0.0.4 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
Expand Down
12 changes: 10 additions & 2 deletions handlers/scraper/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package handlers

import (
"bytes"
_ "embed"
"errors"
"instafix/utils"
"net/url"
Expand All @@ -13,6 +14,7 @@ import (
"github.com/PurpleSec/escape"
"github.com/cockroachdb/pebble"
"github.com/kelindar/binary"
"github.com/klauspost/compress/zstd"
"github.com/rs/zerolog/log"
"github.com/tdewolff/parse/v2"
"github.com/tdewolff/parse/v2/js"
Expand All @@ -39,6 +41,9 @@ var RemoteScraperAddr string

var sflightScraper singleflight.Group

//go:embed zstd.dict
var zstdDict []byte

type Media struct {
TypeName string
URL string
Expand Down Expand Up @@ -141,9 +146,12 @@ func (i *InstaData) ScrapeData() error {
req.Header.Set("Accept-Encoding", "gzip, deflate, br")
req.SetRequestURI(RemoteScraperAddr + "/scrape/" + i.PostID)
if err = client.DoTimeout(req, res, timeout); err == nil && res.StatusCode() == fasthttp.StatusOK {
iDataGunzip, err := res.BodyGunzip()
// Remote scraper is compressed with zstd+dict
decData := make([]byte, 0, 1024)
dec, _ := zstd.NewReader(nil, zstd.WithDecoderDicts(zstdDict))
decData, err = dec.DecodeAll(res.Body(), nil)
if err == nil {
if err = binary.Unmarshal(iDataGunzip, i); err == nil {
if err = binary.Unmarshal(decData, i); err == nil {
log.Info().Str("postID", i.PostID).Msg("Data parsed from remote scraper")
return nil
}
Expand Down
Binary file added handlers/scraper/zstd.dict
Binary file not shown.

0 comments on commit 0e1199b

Please sign in to comment.