From eea6d8f23b302a71e02055024c9b5790bfea2598 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Fri, 3 Jan 2025 12:08:30 -0800 Subject: [PATCH] Add EscapeWithLegacySemantics WARNING: This commit contains breaking changes. Combine the EscapeInvalidUTF8 and PreserveRawStrings options as EscapeWithLegacySemantics. Both options were fairly esoteric, so combine them as they do not justify being individual options. --- arshal_default.go | 2 +- internal/jsonflags/flags.go | 37 ++++++++++++++++--------------- internal/jsonwire/encode.go | 13 ++++++----- jsontext/value.go | 1 - migrate.sh | 3 +-- v1/options.go | 43 ++++++++++++++++--------------------- 6 files changed, 46 insertions(+), 53 deletions(-) diff --git a/arshal_default.go b/arshal_default.go index b759991..c5b3029 100644 --- a/arshal_default.go +++ b/arshal_default.go @@ -1097,7 +1097,7 @@ func makeStructArshaler(t reflect.Type) *arshaler { // Append the token to the output and to the state machine. n0 := len(b) // offset before calling AppendQuote - if !mo.Flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS | jsonflags.EscapeInvalidUTF8) { + if !mo.Flags.Get(jsonflags.AnyEscape) { b = append(b, f.quotedName...) } else { b, _ = jsonwire.AppendQuote(b, f.name, &mo.Flags) diff --git a/internal/jsonflags/flags.go b/internal/jsonflags/flags.go index a50ef8c..ce10709 100644 --- a/internal/jsonflags/flags.go +++ b/internal/jsonflags/flags.go @@ -50,7 +50,7 @@ const ( AllowInvalidUTF8 | EscapeForHTML | EscapeForJS | - EscapeInvalidUTF8 | + EscapeWithLegacySemantics | PreserveRawStrings | Deterministic | FormatNilMapAsNull | @@ -74,28 +74,31 @@ const ( // In contrast to AnyWhitespace, this includes Indent and IndentPrefix // as those settings take no effect if Multiline is false. WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix + + // AnyEscape is the set of flags related to escaping in a JSON string. + AnyEscape = EscapeForHTML | EscapeForJS | EscapeWithLegacySemantics ) // Encoder and decoder flags. const ( initFlag Bools = 1 << iota // reserved for the boolean value itself - AllowDuplicateNames // encode or decode - AllowInvalidUTF8 // encode or decode - WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal - OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite - PreserveRawStrings // encode only; exposed in v1 and also used by jsontext.Value.Canonicalize - CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize - EscapeForHTML // encode only - EscapeForJS // encode only - EscapeInvalidUTF8 // encode only; only exposed in v1 - Multiline // encode only - SpaceAfterColon // encode only - SpaceAfterComma // encode only - Indent // encode only; non-boolean flag - IndentPrefix // encode only; non-boolean flag - ByteLimit // encode or decode; non-boolean flag - DepthLimit // encode or decode; non-boolean flag + AllowDuplicateNames // encode or decode + AllowInvalidUTF8 // encode or decode + WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal + OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite + PreserveRawStrings // encode only; exposed in v1 and also used by jsontext.Value.Canonicalize + CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize + EscapeForHTML // encode only + EscapeForJS // encode only + EscapeWithLegacySemantics // encode only; only exposed in v1 + Multiline // encode only + SpaceAfterColon // encode only + SpaceAfterComma // encode only + Indent // encode only; non-boolean flag + IndentPrefix // encode only; non-boolean flag + ByteLimit // encode or decode; non-boolean flag + DepthLimit // encode or decode; non-boolean flag maxCoderFlag ) diff --git a/internal/jsonwire/encode.go b/internal/jsonwire/encode.go index 366aa49..f297a2c 100644 --- a/internal/jsonwire/encode.go +++ b/internal/jsonwire/encode.go @@ -85,7 +85,7 @@ func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflag case r == utf8.RuneError && rn == 1: hasInvalidUTF8 = true dst = append(dst, src[i:n]...) - if flags.Get(jsonflags.EscapeInvalidUTF8) { + if flags.Get(jsonflags.EscapeWithLegacySemantics) { dst = append(dst, `\ufffd`...) } else { dst = append(dst, "\ufffd"...) @@ -158,17 +158,16 @@ func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error // If the output requires no special escapes, and the input // is already in canonical form or should be preserved verbatim, // then directly copy the input to the output. - if !flags.Get(jsonflags.EscapeForHTML|jsonflags.EscapeForJS) && + if !flags.Get(jsonflags.AnyEscape) && (valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) { dst = append(dst, src[:n]...) // copy the string verbatim return dst, n, nil } - // If the input should be preserved verbatim, we still need to - // respect the EscapeForHTML and EscapeForJS options. - // Note that EscapeInvalidUTF8 is not respected. - // This logic ensures that pre-escaped sequences remained escaped. - if flags.Get(jsonflags.PreserveRawStrings) { + // Under [jsonflags.EscapeWithLegacySemantics], any pre-escaped sequences + // remain escaped, however we still need to respect the + // [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options. + if flags.Get(jsonflags.EscapeWithLegacySemantics) { var i, lastAppendIndex int for i < n { if c := src[i]; c < utf8.RuneSelf { diff --git a/jsontext/value.go b/jsontext/value.go index 97c97b1..912088c 100644 --- a/jsontext/value.go +++ b/jsontext/value.go @@ -150,7 +150,6 @@ func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error eo.Flags.Set(jsonflags.PreserveRawStrings | 0) // per RFC 8785, section 3.2.2.2 eo.Flags.Set(jsonflags.EscapeForHTML | 0) // per RFC 8785, section 3.2.2.2 eo.Flags.Set(jsonflags.EscapeForJS | 0) // per RFC 8785, section 3.2.2.2 - eo.Flags.Set(jsonflags.EscapeInvalidUTF8 | 0) // per RFC 8785, section 3.2.2.2 eo.Flags.Set(jsonflags.Multiline | 0) // per RFC 8785, section 3.2.1 } else { if s := strings.TrimLeft(prefix, " \t"); len(s) > 0 { diff --git a/migrate.sh b/migrate.sh index 0685176..f495313 100755 --- a/migrate.sh +++ b/migrate.sh @@ -72,13 +72,12 @@ ISSUE=63397 # TODO: Replace with formal proposal issue for encoding/json/v2 FILE=$(cd $GOROOT/api; ls -v | tail -n 1) echo "pkg encoding/json, func CallMethodsWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func DefaultOptionsV1() jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE -echo "pkg encoding/json, func EscapeInvalidUTF8(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE +echo "pkg encoding/json, func EscapeWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func FormatBytesWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func FormatTimeWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func MatchCaseSensitiveDelimiter(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func MergeWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func OmitEmptyWithLegacyDefinition(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE -echo "pkg encoding/json, func PreserveRawStrings(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func RejectFloatOverflow(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func ReportErrorsWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE echo "pkg encoding/json, func StringifyWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE diff --git a/v1/options.go b/v1/options.go index 3d0ee3b..699a499 100644 --- a/v1/options.go +++ b/v1/options.go @@ -32,7 +32,7 @@ type Options = jsonopts.Options // It is equivalent to the following boolean options being set to true: // // - [CallMethodsWithLegacySemantics] -// - [EscapeInvalidUTF8] +// - [EscapeWithLegacySemantics] // - [FormatBytesWithLegacySemantics] // - [FormatTimeWithLegacySemantics] // - [MatchCaseSensitiveDelimiter] @@ -108,19 +108,28 @@ func CallMethodsWithLegacySemantics(v bool) Options { } } -// EscapeInvalidUTF8 specifies that bytes of invalid UTF-8 within JSON strings -// should be escaped as a hexadecimal Unicode codepoint (i.e., \ufffd) -// of the Unicode replacement character as opposed to being encoded -// as the Unicode replacement character verbatim (without escaping). -// This option has no effect if [jsontext.AllowInvalidUTF8] is false. +// EscapeWithLegacySemantics specifies that JSON strings are escaped +// with legacy semantics: +// +// - When encoding a literal [jsontext.Token] with bytes of invalid UTF-8, +// such bytes are escaped as a hexadecimal Unicode codepoint (i.e., \ufffd). +// In contrast, the v2 default is to use the minimal representation, +// which is encode invalid UTF-8 as the Unicode replacement rune itself +// (without any form of escaping). +// +// - When encoding a raw [jsontext.Token] or [jsontext.Value] +// pre-escaped sequences in a JSON string are preserved to the output. +// In contrast, the v2 default is use the minimal representation, +// and only escape what is necessary to satisfy the +// [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] options. // // This only affects encoding and is ignored when decoding. // The v1 default is true. -func EscapeInvalidUTF8(v bool) Options { +func EscapeWithLegacySemantics(v bool) Options { if v { - return jsonflags.EscapeInvalidUTF8 | 1 + return jsonflags.EscapeWithLegacySemantics | 1 } else { - return jsonflags.EscapeInvalidUTF8 | 0 + return jsonflags.EscapeWithLegacySemantics | 0 } } @@ -254,22 +263,6 @@ func OmitEmptyWithLegacyDefinition(v bool) Options { } } -// PreserveRawStrings specifies that raw JSON string values passed to -// [jsontext.Encoder.WriteValue] and [jsontext.Encoder.WriteToken] -// preserve their original encoding. -// However, characters that still need escaping according to -// [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] are escaped. -// -// This only affects encoding and is ignored when decoding. -// The v1 default is true. -func PreserveRawStrings(v bool) Options { - if v { - return jsonflags.PreserveRawStrings | 1 - } else { - return jsonflags.PreserveRawStrings | 0 - } -} - // RejectFloatOverflow specifies that unmarshaling a JSON number that // exceeds the maximum representation of a Go float32 or float64 // results in an error, rather than succeeding with the floating-point values