Skip to content

Commit

Permalink
Add EscapeWithLegacySemantics
Browse files Browse the repository at this point in the history
WARNING: This commit contains breaking changes.

Combine the EscapeInvalidUTF8 and PreserveRawStrings options
as EscapeWithLegacySemantics. Both options were fairly esoteric,
so combine them as they do not justify being individual options.
  • Loading branch information
dsnet committed Jan 3, 2025
1 parent 6129f21 commit eea6d8f
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 53 deletions.
2 changes: 1 addition & 1 deletion arshal_default.go
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,7 @@ func makeStructArshaler(t reflect.Type) *arshaler {

// Append the token to the output and to the state machine.
n0 := len(b) // offset before calling AppendQuote
if !mo.Flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS | jsonflags.EscapeInvalidUTF8) {
if !mo.Flags.Get(jsonflags.AnyEscape) {
b = append(b, f.quotedName...)
} else {
b, _ = jsonwire.AppendQuote(b, f.name, &mo.Flags)
Expand Down
37 changes: 20 additions & 17 deletions internal/jsonflags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const (
AllowInvalidUTF8 |
EscapeForHTML |
EscapeForJS |
EscapeInvalidUTF8 |
EscapeWithLegacySemantics |
PreserveRawStrings |
Deterministic |
FormatNilMapAsNull |
Expand All @@ -74,28 +74,31 @@ const (
// In contrast to AnyWhitespace, this includes Indent and IndentPrefix
// as those settings take no effect if Multiline is false.
WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix

// AnyEscape is the set of flags related to escaping in a JSON string.
AnyEscape = EscapeForHTML | EscapeForJS | EscapeWithLegacySemantics
)

// Encoder and decoder flags.
const (
initFlag Bools = 1 << iota // reserved for the boolean value itself

AllowDuplicateNames // encode or decode
AllowInvalidUTF8 // encode or decode
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
PreserveRawStrings // encode only; exposed in v1 and also used by jsontext.Value.Canonicalize
CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize
EscapeForHTML // encode only
EscapeForJS // encode only
EscapeInvalidUTF8 // encode only; only exposed in v1
Multiline // encode only
SpaceAfterColon // encode only
SpaceAfterComma // encode only
Indent // encode only; non-boolean flag
IndentPrefix // encode only; non-boolean flag
ByteLimit // encode or decode; non-boolean flag
DepthLimit // encode or decode; non-boolean flag
AllowDuplicateNames // encode or decode
AllowInvalidUTF8 // encode or decode
WithinArshalCall // encode or decode; for internal use by json.Marshal and json.Unmarshal
OmitTopLevelNewline // encode only; for internal use by json.Marshal and json.MarshalWrite
PreserveRawStrings // encode only; exposed in v1 and also used by jsontext.Value.Canonicalize
CanonicalizeNumbers // encode only; for internal use by jsontext.Value.Canonicalize
EscapeForHTML // encode only
EscapeForJS // encode only
EscapeWithLegacySemantics // encode only; only exposed in v1
Multiline // encode only
SpaceAfterColon // encode only
SpaceAfterComma // encode only
Indent // encode only; non-boolean flag
IndentPrefix // encode only; non-boolean flag
ByteLimit // encode or decode; non-boolean flag
DepthLimit // encode or decode; non-boolean flag

maxCoderFlag
)
Expand Down
13 changes: 6 additions & 7 deletions internal/jsonwire/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflag
case r == utf8.RuneError && rn == 1:
hasInvalidUTF8 = true
dst = append(dst, src[i:n]...)
if flags.Get(jsonflags.EscapeInvalidUTF8) {
if flags.Get(jsonflags.EscapeWithLegacySemantics) {
dst = append(dst, `\ufffd`...)
} else {
dst = append(dst, "\ufffd"...)
Expand Down Expand Up @@ -158,17 +158,16 @@ func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error
// If the output requires no special escapes, and the input
// is already in canonical form or should be preserved verbatim,
// then directly copy the input to the output.
if !flags.Get(jsonflags.EscapeForHTML|jsonflags.EscapeForJS) &&
if !flags.Get(jsonflags.AnyEscape) &&
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
dst = append(dst, src[:n]...) // copy the string verbatim
return dst, n, nil
}

// If the input should be preserved verbatim, we still need to
// respect the EscapeForHTML and EscapeForJS options.
// Note that EscapeInvalidUTF8 is not respected.
// This logic ensures that pre-escaped sequences remained escaped.
if flags.Get(jsonflags.PreserveRawStrings) {
// Under [jsonflags.EscapeWithLegacySemantics], any pre-escaped sequences
// remain escaped, however we still need to respect the
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
if flags.Get(jsonflags.EscapeWithLegacySemantics) {
var i, lastAppendIndex int
for i < n {
if c := src[i]; c < utf8.RuneSelf {
Expand Down
1 change: 0 additions & 1 deletion jsontext/value.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ func (v *Value) reformat(canonical, multiline bool, prefix, indent string) error
eo.Flags.Set(jsonflags.PreserveRawStrings | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.EscapeForHTML | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.EscapeForJS | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.EscapeInvalidUTF8 | 0) // per RFC 8785, section 3.2.2.2
eo.Flags.Set(jsonflags.Multiline | 0) // per RFC 8785, section 3.2.1
} else {
if s := strings.TrimLeft(prefix, " \t"); len(s) > 0 {
Expand Down
3 changes: 1 addition & 2 deletions migrate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,12 @@ ISSUE=63397 # TODO: Replace with formal proposal issue for encoding/json/v2
FILE=$(cd $GOROOT/api; ls -v | tail -n 1)
echo "pkg encoding/json, func CallMethodsWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func DefaultOptionsV1() jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func EscapeInvalidUTF8(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func EscapeWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func FormatBytesWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func FormatTimeWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func MatchCaseSensitiveDelimiter(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func MergeWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func OmitEmptyWithLegacyDefinition(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func PreserveRawStrings(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func RejectFloatOverflow(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func ReportErrorsWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
echo "pkg encoding/json, func StringifyWithLegacySemantics(bool) jsonopts.Options #$ISSUE" >> $GOROOT/api/$FILE
Expand Down
43 changes: 18 additions & 25 deletions v1/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ type Options = jsonopts.Options
// It is equivalent to the following boolean options being set to true:
//
// - [CallMethodsWithLegacySemantics]
// - [EscapeInvalidUTF8]
// - [EscapeWithLegacySemantics]
// - [FormatBytesWithLegacySemantics]
// - [FormatTimeWithLegacySemantics]
// - [MatchCaseSensitiveDelimiter]
Expand Down Expand Up @@ -108,19 +108,28 @@ func CallMethodsWithLegacySemantics(v bool) Options {
}
}

// EscapeInvalidUTF8 specifies that bytes of invalid UTF-8 within JSON strings
// should be escaped as a hexadecimal Unicode codepoint (i.e., \ufffd)
// of the Unicode replacement character as opposed to being encoded
// as the Unicode replacement character verbatim (without escaping).
// This option has no effect if [jsontext.AllowInvalidUTF8] is false.
// EscapeWithLegacySemantics specifies that JSON strings are escaped
// with legacy semantics:
//
// - When encoding a literal [jsontext.Token] with bytes of invalid UTF-8,
// such bytes are escaped as a hexadecimal Unicode codepoint (i.e., \ufffd).
// In contrast, the v2 default is to use the minimal representation,
// which is encode invalid UTF-8 as the Unicode replacement rune itself
// (without any form of escaping).
//
// - When encoding a raw [jsontext.Token] or [jsontext.Value]
// pre-escaped sequences in a JSON string are preserved to the output.
// In contrast, the v2 default is use the minimal representation,
// and only escape what is necessary to satisfy the
// [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] options.
//
// This only affects encoding and is ignored when decoding.
// The v1 default is true.
func EscapeInvalidUTF8(v bool) Options {
func EscapeWithLegacySemantics(v bool) Options {
if v {
return jsonflags.EscapeInvalidUTF8 | 1
return jsonflags.EscapeWithLegacySemantics | 1
} else {
return jsonflags.EscapeInvalidUTF8 | 0
return jsonflags.EscapeWithLegacySemantics | 0
}
}

Expand Down Expand Up @@ -254,22 +263,6 @@ func OmitEmptyWithLegacyDefinition(v bool) Options {
}
}

// PreserveRawStrings specifies that raw JSON string values passed to
// [jsontext.Encoder.WriteValue] and [jsontext.Encoder.WriteToken]
// preserve their original encoding.
// However, characters that still need escaping according to
// [jsontext.EscapeForHTML] and [jsontext.EscapeForJS] are escaped.
//
// This only affects encoding and is ignored when decoding.
// The v1 default is true.
func PreserveRawStrings(v bool) Options {
if v {
return jsonflags.PreserveRawStrings | 1
} else {
return jsonflags.PreserveRawStrings | 0
}
}

// RejectFloatOverflow specifies that unmarshaling a JSON number that
// exceeds the maximum representation of a Go float32 or float64
// results in an error, rather than succeeding with the floating-point values
Expand Down

0 comments on commit eea6d8f

Please sign in to comment.