diff --git a/handlers/embed.go b/handlers/embed.go index a453b5e..fc4156b 100644 --- a/handlers/embed.go +++ b/handlers/embed.go @@ -130,7 +130,7 @@ func Embed(w http.ResponseWriter, r *http.Request) { if r.TLS != nil { scheme = "https" } - viewsData.OEmbedURL = scheme + "://" + r.Host + "/oembed?text=" + url.QueryEscape(viewsData.Description) + "&url=" + viewsData.URL + viewsData.OEmbedURL = scheme + "://" + r.Host + "/oembed?text=" + url.QueryEscape(`"`+viewsData.Description) + "&url=" + viewsData.URL } if isDirect { http.Redirect(w, r, sb.String(), http.StatusFound) diff --git a/handlers/oembed.go b/handlers/oembed.go index c026521..97b6e43 100644 --- a/handlers/oembed.go +++ b/handlers/oembed.go @@ -1,11 +1,10 @@ package handlers import ( + "instafix/utils" "instafix/views" "instafix/views/model" "net/http" - - "github.com/PurpleSec/escape" ) func OEmbed(w http.ResponseWriter, r *http.Request) { @@ -22,7 +21,7 @@ func OEmbed(w http.ResponseWriter, r *http.Request) { // Totally safe 100% valid template 👍 OEmbedData := &model.OEmbedData{ - Text: escape.JSON(headingText), + Text: utils.EscapeJSONString(headingText), URL: headingURL, } diff --git a/handlers/scraper/data.go b/handlers/scraper/data.go index b83a49b..c7e234c 100644 --- a/handlers/scraper/data.go +++ b/handlers/scraper/data.go @@ -14,7 +14,6 @@ import ( "time" "github.com/PuerkitoBio/goquery" - "github.com/PurpleSec/escape" "github.com/kelindar/binary" "github.com/klauspost/compress/gzhttp" "github.com/klauspost/compress/zstd" @@ -385,7 +384,7 @@ func scrapeFromEmbedHTML(embedHTML []byte) (string, error) { "shortcode_media": { "owner": {"username": "` + username + `"}, "node": {"__typename": "` + typename + `", "display_url": "` + mediaURL + `"}, - "edge_media_to_caption": {"edges": [{"node": {"text": ` + escape.JSON(caption) + `}}]}, + "edge_media_to_caption": {"edges": [{"node": {"text": ` + utils.EscapeJSONString(caption) + `}}]}, "dimensions": {"height": null, "width": null}, "video_blocked": ` + videoBlocked + ` } diff --git a/utils/jsonesc.go b/utils/jsonesc.go index 702bc7b..bdc7e84 100644 --- a/utils/jsonesc.go +++ b/utils/jsonesc.go @@ -7,8 +7,111 @@ import ( "strconv" "strings" "unicode/utf16" + "unicode/utf8" ) +// Copied from encoding/json +const hex = "0123456789abcdef" + +var safeSet = [utf8.RuneSelf]bool{ + ' ': true, + '!': true, + '"': false, + '#': true, + '$': true, + '%': true, + '&': true, + '\'': true, + '(': true, + ')': true, + '*': true, + '+': true, + ',': true, + '-': true, + '.': true, + '/': true, + '0': true, + '1': true, + '2': true, + '3': true, + '4': true, + '5': true, + '6': true, + '7': true, + '8': true, + '9': true, + ':': true, + ';': true, + '<': true, + '=': true, + '>': true, + '?': true, + '@': true, + 'A': true, + 'B': true, + 'C': true, + 'D': true, + 'E': true, + 'F': true, + 'G': true, + 'H': true, + 'I': true, + 'J': true, + 'K': true, + 'L': true, + 'M': true, + 'N': true, + 'O': true, + 'P': true, + 'Q': true, + 'R': true, + 'S': true, + 'T': true, + 'U': true, + 'V': true, + 'W': true, + 'X': true, + 'Y': true, + 'Z': true, + '[': true, + '\\': false, + ']': true, + '^': true, + '_': true, + '`': true, + 'a': true, + 'b': true, + 'c': true, + 'd': true, + 'e': true, + 'f': true, + 'g': true, + 'h': true, + 'i': true, + 'j': true, + 'k': true, + 'l': true, + 'm': true, + 'n': true, + 'o': true, + 'p': true, + 'q': true, + 'r': true, + 's': true, + 't': true, + 'u': true, + 'v': true, + 'w': true, + 'x': true, + 'y': true, + 'z': true, + '{': true, + '|': true, + '}': true, + '~': true, + '\u007f': true, +} + func UnescapeJSONString(s string) string { n := strings.IndexByte(s, '\\') if n < 0 { @@ -89,3 +192,89 @@ func UnescapeJSONString(s string) string { } return B2S(b) } + +func EscapeJSONString(src string) string { + sb := strings.Builder{} + sb.Grow(len(src)) + sb.WriteByte('"') + start := 0 + for i := 0; i < len(src); { + if b := src[i]; b < utf8.RuneSelf { + if safeSet[b] { + i++ + continue + } + sb.WriteString(src[start:i]) + switch b { + case '\\', '"': + sb.WriteByte('\\') + sb.WriteByte(b) + case '\b': + sb.WriteByte('\\') + sb.WriteByte('b') + case '\f': + sb.WriteByte('\\') + sb.WriteByte('f') + case '\n': + sb.WriteByte('\\') + sb.WriteByte('n') + case '\r': + sb.WriteByte('\\') + sb.WriteByte('r') + case '\t': + sb.WriteByte('\\') + sb.WriteByte('t') + default: + // This encodes bytes < 0x20 except for \b, \f, \n, \r and \t. + // If escapeHTML is set, it also escapes <, >, and & + // because they can lead to security holes when + // user-controlled strings are rendered into JSON + // and served to some browsers. + sb.WriteByte('\\') + sb.WriteByte('u') + sb.WriteByte('0') + sb.WriteByte('0') + sb.WriteByte(hex[b>>4]) + sb.WriteByte(hex[b&0xF]) + } + i++ + start = i + continue + } + // TODO(https://go.dev/issue/56948): Use generic utf8 functionality. + // For now, cast only a small portion of byte slices to a string + // so that it can be stack allocated. This slows down []byte slightly + // due to the extra copy, but keeps string performance roughly the same. + n := len(src) - i + if n > utf8.UTFMax { + n = utf8.UTFMax + } + c, size := utf8.DecodeRuneInString(string(src[i : i+n])) + if c == utf8.RuneError && size == 1 { + sb.WriteString(src[start:i]) + sb.WriteString(`\ufffd`) + i += size + start = i + continue + } + // U+2028 is LINE SEPARATOR. + // U+2029 is PARAGRAPH SEPARATOR. + // They are both technically valid characters in JSON strings, + // but don't work in JSONP, which has to be evaluated as JavaScript, + // and can lead to security holes there. It is valid JSON to + // escape them, so we do so unconditionally. + // See https://en.wikipedia.org/wiki/JSON#Safety. + if c == '\u2028' || c == '\u2029' { + sb.WriteString(src[start:i]) + sb.WriteString(`\u202`) + sb.WriteByte(hex[c&0xF]) + i += size + start = i + continue + } + i += size + } + sb.WriteString(src[start:]) + sb.WriteByte('"') + return sb.String() +}