From dfb90aa8d2024f8ad3b8b3587e981d4e84740522 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Sat, 14 Dec 2024 18:02:25 -0800 Subject: [PATCH] Implement v1 in terms of v2 Remove all the core functionality of v1 and instead implement it in terms of v2 with the appropriate options specified. Many v1 test cases currently failure because the v1 emulation in v2 is not a sufficient reproduction of historial v1 behavior. The list of known failures are stored in v1/failing.txt, which allows such failing tests to be skipped. As the v1 emulation in v2 improves, the failing.txt list should eventually become empty. --- v1/bench_test.go | 92 ---- v1/decode.go | 1174 ++------------------------------------------ v1/decode_test.go | 18 +- v1/encode.go | 1115 +---------------------------------------- v1/encode_test.go | 11 + v1/failing.txt | 184 +++++++ v1/failing_test.go | 92 ++++ v1/fold.go | 48 -- v1/fold_test.go | 50 -- v1/indent.go | 148 ++---- v1/number_test.go | 118 ----- v1/options.go | 10 + v1/scanner.go | 609 ++--------------------- v1/scanner_test.go | 1 + v1/stream.go | 457 ++++------------- v1/stream_test.go | 43 +- v1/tables.go | 218 -------- v1/tagkey_test.go | 5 +- v1/tags.go | 38 -- v1/tags_test.go | 28 -- 20 files changed, 537 insertions(+), 3922 deletions(-) create mode 100644 v1/failing.txt create mode 100644 v1/failing_test.go delete mode 100644 v1/fold.go delete mode 100644 v1/fold_test.go delete mode 100644 v1/number_test.go delete mode 100644 v1/tables.go delete mode 100644 v1/tags.go delete mode 100644 v1/tags_test.go diff --git a/v1/bench_test.go b/v1/bench_test.go index 43a7963..fae5bc9 100644 --- a/v1/bench_test.go +++ b/v1/bench_test.go @@ -12,13 +12,8 @@ package json import ( "bytes" - "fmt" "io" - "reflect" - "regexp" - "runtime" "strings" - "sync" "testing" "github.com/go-json-experiment/json/internal/jsontest" @@ -451,67 +446,6 @@ func BenchmarkUnmapped(b *testing.B) { }) } -func BenchmarkTypeFieldsCache(b *testing.B) { - b.ReportAllocs() - var maxTypes int = 1e6 - - // Dynamically generate many new types. - types := make([]reflect.Type, maxTypes) - fs := []reflect.StructField{{ - Type: reflect.TypeFor[string](), - Index: []int{0}, - }} - for i := range types { - fs[0].Name = fmt.Sprintf("TypeFieldsCache%d", i) - types[i] = reflect.StructOf(fs) - } - - // clearClear clears the cache. Other JSON operations, must not be running. - clearCache := func() { - fieldCache = sync.Map{} - } - - // MissTypes tests the performance of repeated cache misses. - // This measures the time to rebuild a cache of size nt. - for nt := 1; nt <= maxTypes; nt *= 10 { - ts := types[:nt] - b.Run(fmt.Sprintf("MissTypes%d", nt), func(b *testing.B) { - nc := runtime.GOMAXPROCS(0) - for i := 0; i < b.N; i++ { - clearCache() - var wg sync.WaitGroup - for j := 0; j < nc; j++ { - wg.Add(1) - go func(j int) { - for _, t := range ts[(j*len(ts))/nc : ((j+1)*len(ts))/nc] { - cachedTypeFields(t) - } - wg.Done() - }(j) - } - wg.Wait() - } - }) - } - - // HitTypes tests the performance of repeated cache hits. - // This measures the average time of each cache lookup. - for nt := 1; nt <= maxTypes; nt *= 10 { - // Pre-warm a cache of size nt. - clearCache() - for _, t := range types[:nt] { - cachedTypeFields(t) - } - b.Run(fmt.Sprintf("HitTypes%d", nt), func(b *testing.B) { - b.RunParallel(func(pb *testing.PB) { - for pb.Next() { - cachedTypeFields(types[0]) - } - }) - }) - } -} - func BenchmarkEncodeMarshaler(b *testing.B) { b.ReportAllocs() @@ -545,29 +479,3 @@ func BenchmarkEncoderEncode(b *testing.B) { } }) } - -func BenchmarkNumberIsValid(b *testing.B) { - s := "-61657.61667E+61673" - for i := 0; i < b.N; i++ { - isValidNumber(s) - } -} - -func BenchmarkNumberIsValidRegexp(b *testing.B) { - var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) - s := "-61657.61667E+61673" - for i := 0; i < b.N; i++ { - jsonNumberRegexp.MatchString(s) - } -} - -func BenchmarkUnmarshalNumber(b *testing.B) { - b.ReportAllocs() - data := []byte(`"-61657.61667E+61673"`) - var number Number - for i := 0; i < b.N; i++ { - if err := Unmarshal(data, &number); err != nil { - b.Fatal("Unmarshal:", err) - } - } -} diff --git a/v1/decode.go b/v1/decode.go index 9810229..d66936f 100644 --- a/v1/decode.go +++ b/v1/decode.go @@ -8,16 +8,13 @@ package json import ( - "encoding" - "encoding/base64" "fmt" "reflect" "strconv" - "strings" - "unicode" - "unicode/utf16" - "unicode/utf8" - _ "unsafe" // for linkname + + jsonv2 "github.com/go-json-experiment/json" + "github.com/go-json-experiment/json/internal/jsonwire" + "github.com/go-json-experiment/json/jsontext" ) // Unmarshal parses the JSON-encoded data and stores the result @@ -95,17 +92,7 @@ import ( // Instead, they are replaced by the Unicode replacement // character U+FFFD. func Unmarshal(data []byte, v any) error { - // Check for well-formedness. - // Avoids filling out half a data structure - // before discovering a JSON syntax error. - var d decodeState - err := checkValid(data, &d.scan) - if err != nil { - return err - } - - d.init(data) - return d.unmarshal(v) + return jsonv2.Unmarshal(data, v, DefaultOptionsV1()) } // Unmarshaler is the interface implemented by types @@ -116,9 +103,7 @@ func Unmarshal(data []byte, v any) error { // // By convention, to approximate the behavior of [Unmarshal] itself, // Unmarshalers implement UnmarshalJSON([]byte("null")) as a no-op. -type Unmarshaler interface { - UnmarshalJSON([]byte) error -} +type Unmarshaler = jsonv2.UnmarshalerV1 // An UnmarshalTypeError describes a JSON value that was // not appropriate for a value of a specific Go type. @@ -168,23 +153,6 @@ func (e *InvalidUnmarshalError) Error() string { return "json: Unmarshal(nil " + e.Type.String() + ")" } -func (d *decodeState) unmarshal(v any) error { - rv := reflect.ValueOf(v) - if rv.Kind() != reflect.Pointer || rv.IsNil() { - return &InvalidUnmarshalError{reflect.TypeOf(v)} - } - - d.scan.reset() - d.scanWhile(scanSkipSpace) - // We decode rv not rv.Elem because the Unmarshaler interface - // test must be applied at the top level of the value. - err := d.value(rv) - if err != nil { - return d.addErrorContext(err) - } - return d.savedError -} - // A Number represents a JSON number literal. type Number string @@ -201,1112 +169,54 @@ func (n Number) Int64() (int64, error) { return strconv.ParseInt(string(n), 10, 64) } -// An errorContext provides context for type errors during decoding. -type errorContext struct { - Struct reflect.Type - FieldStack []string -} - -// decodeState represents the state while decoding a JSON value. -type decodeState struct { - data []byte - off int // next read offset in data - opcode int // last read result - scan scanner - errorContext *errorContext - savedError error - useNumber bool - disallowUnknownFields bool -} - -// readIndex returns the position of the last byte read. -func (d *decodeState) readIndex() int { - return d.off - 1 -} - -// phasePanicMsg is used as a panic message when we end up with something that -// shouldn't happen. It can indicate a bug in the JSON decoder, or that -// something is editing the data slice while the decoder executes. -const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" - -func (d *decodeState) init(data []byte) *decodeState { - d.data = data - d.off = 0 - d.savedError = nil - if d.errorContext != nil { - d.errorContext.Struct = nil - // Reuse the allocated space for the FieldStack slice. - d.errorContext.FieldStack = d.errorContext.FieldStack[:0] - } - return d -} - -// saveError saves the first err it is called with, -// for reporting at the end of the unmarshal. -func (d *decodeState) saveError(err error) { - if d.savedError == nil { - d.savedError = d.addErrorContext(err) - } -} - -// addErrorContext returns a new error enhanced with information from d.errorContext -func (d *decodeState) addErrorContext(err error) error { - if d.errorContext != nil && (d.errorContext.Struct != nil || len(d.errorContext.FieldStack) > 0) { - switch err := err.(type) { - case *UnmarshalTypeError: - err.Struct = d.errorContext.Struct.Name() - fieldStack := d.errorContext.FieldStack - if err.Field != "" { - fieldStack = append(fieldStack, err.Field) - } - err.Field = strings.Join(fieldStack, ".") - } - } - return err -} - -// skip scans to the end of what was started. -func (d *decodeState) skip() { - s, data, i := &d.scan, d.data, d.off - depth := len(s.parseState) - for { - op := s.step(s, data[i]) - i++ - if len(s.parseState) < depth { - d.off = i - d.opcode = op - return - } - } -} - -// scanNext processes the byte at d.data[d.off]. -func (d *decodeState) scanNext() { - if d.off < len(d.data) { - d.opcode = d.scan.step(&d.scan, d.data[d.off]) - d.off++ - } else { - d.opcode = d.scan.eof() - d.off = len(d.data) + 1 // mark processed EOF with len+1 - } -} - -// scanWhile processes bytes in d.data[d.off:] until it -// receives a scan code not equal to op. -func (d *decodeState) scanWhile(op int) { - s, data, i := &d.scan, d.data, d.off - for i < len(data) { - newOp := s.step(s, data[i]) - i++ - if newOp != op { - d.opcode = newOp - d.off = i - return - } - } - - d.off = len(data) + 1 // mark processed EOF with len+1 - d.opcode = d.scan.eof() -} +var numberType = reflect.TypeFor[Number]() -// rescanLiteral is similar to scanWhile(scanContinue), but it specialises the -// common case where we're decoding a literal. The decoder scans the input -// twice, once for syntax errors and to check the length of the value, and the -// second to perform the decoding. -// -// Only in the second step do we use decodeState to tokenize literals, so we -// know there aren't any syntax errors. We can take advantage of that knowledge, -// and scan a literal's bytes much more quickly. -func (d *decodeState) rescanLiteral() { - data, i := d.data, d.off -Switch: - switch data[i-1] { - case '"': // string - for ; i < len(data); i++ { - switch data[i] { - case '\\': - i++ // escaped char - case '"': - i++ // tokenize the closing quote too - break Switch - } - } - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number - for ; i < len(data); i++ { - switch data[i] { - case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - '.', 'e', 'E', '+', '-': - default: - break Switch - } - } - case 't': // true - i += len("rue") - case 'f': // false - i += len("alse") - case 'n': // null - i += len("ull") - } - if i < len(data) { - d.opcode = stateEndValue(&d.scan, data[i]) +// MarshalJSONV2 implements [jsonv2.MarshalerV2]. +func (n Number) MarshalJSONV2(enc *jsontext.Encoder, opts jsonv2.Options) error { + if n == "" { + return enc.WriteToken(jsontext.Int(0)) + } + var num []byte + val := enc.UnusedBuffer() + if stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers); stringify { + val = append(val, '"') + val = append(val, n...) + val = append(val, '"') + num = val[len(`"`) : len(val)-len(`"`)] } else { - d.opcode = scanEnd + val = append(val, n...) + num = val } - d.off = i + 1 -} - -// value consumes a JSON value from d.data[d.off-1:], decoding into v, and -// reads the following byte ahead. If v is invalid, the value is discarded. -// The first byte of the value has been read already. -func (d *decodeState) value(v reflect.Value) error { - switch d.opcode { - default: - panic(phasePanicMsg) - - case scanBeginArray: - if v.IsValid() { - if err := d.array(v); err != nil { - return err - } - } else { - d.skip() - } - d.scanNext() - - case scanBeginObject: - if v.IsValid() { - if err := d.object(v); err != nil { - return err - } - } else { - d.skip() - } - d.scanNext() - - case scanBeginLiteral: - // All bytes inside literal return scanContinue op code. - start := d.readIndex() - d.rescanLiteral() - - if v.IsValid() { - if err := d.literalStore(d.data[start:d.readIndex()], v, false); err != nil { - return err - } - } - } - return nil -} - -type unquotedValue struct{} - -// valueQuoted is like value but decodes a -// quoted string literal or literal null into an interface value. -// If it finds anything other than a quoted string literal or null, -// valueQuoted returns unquotedValue{}. -func (d *decodeState) valueQuoted() any { - switch d.opcode { - default: - panic(phasePanicMsg) - - case scanBeginArray, scanBeginObject: - d.skip() - d.scanNext() - - case scanBeginLiteral: - v := d.literalInterface() - switch v.(type) { - case nil, string: - return v - } + if n, err := jsonwire.ConsumeNumber(num); n != len(num) || err != nil { + return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax) } - return unquotedValue{} + return enc.WriteValue(val) } -// indirect walks down v allocating pointers as needed, -// until it gets to a non-pointer. -// If it encounters an Unmarshaler, indirect stops and returns that. -// If decodingNull is true, indirect stops at the first settable pointer so it -// can be set to nil. -func indirect(v reflect.Value, decodingNull bool) (Unmarshaler, encoding.TextUnmarshaler, reflect.Value) { - // Issue #24153 indicates that it is generally not a guaranteed property - // that you may round-trip a reflect.Value by calling Value.Addr().Elem() - // and expect the value to still be settable for values derived from - // unexported embedded struct fields. - // - // The logic below effectively does this when it first addresses the value - // (to satisfy possible pointer methods) and continues to dereference - // subsequent pointers as necessary. - // - // After the first round-trip, we set v back to the original value to - // preserve the original RW flags contained in reflect.Value. - v0 := v - haveAddr := false - - // If v is a named type and is addressable, - // start with its address, so that if the type has pointer methods, - // we find them. - if v.Kind() != reflect.Pointer && v.Type().Name() != "" && v.CanAddr() { - haveAddr = true - v = v.Addr() - } - for { - // Load value from interface, but only if the result will be - // usefully addressable. - if v.Kind() == reflect.Interface && !v.IsNil() { - e := v.Elem() - if e.Kind() == reflect.Pointer && !e.IsNil() && (!decodingNull || e.Elem().Kind() == reflect.Pointer) { - haveAddr = false - v = e - continue - } - } - - if v.Kind() != reflect.Pointer { - break - } - - if decodingNull && v.CanSet() { - break - } - - // Prevent infinite loop if v is an interface pointing to its own address: - // var v any - // v = &v - if v.Elem().Kind() == reflect.Interface && v.Elem().Elem().Equal(v) { - v = v.Elem() - break - } - if v.IsNil() { - v.Set(reflect.New(v.Type().Elem())) - } - if v.Type().NumMethod() > 0 && v.CanInterface() { - if u, ok := v.Interface().(Unmarshaler); ok { - return u, nil, reflect.Value{} - } - if !decodingNull { - if u, ok := v.Interface().(encoding.TextUnmarshaler); ok { - return nil, u, reflect.Value{} - } - } - } - - if haveAddr { - v = v0 // restore original value after round-trip Value.Addr().Elem() - haveAddr = false - } else { - v = v.Elem() - } - } - return nil, nil, v -} - -// array consumes an array from d.data[d.off-1:], decoding into v. -// The first byte of the array ('[') has been read already. -func (d *decodeState) array(v reflect.Value) error { - // Check for unmarshaler. - u, ut, pv := indirect(v, false) - if u != nil { - start := d.readIndex() - d.skip() - return u.UnmarshalJSON(d.data[start:d.off]) - } - if ut != nil { - d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) - d.skip() - return nil - } - v = pv - - // Check type of target. - switch v.Kind() { - case reflect.Interface: - if v.NumMethod() == 0 { - // Decoding into nil interface? Switch to non-reflect code. - ai := d.arrayInterface() - v.Set(reflect.ValueOf(ai)) - return nil - } - // Otherwise it's invalid. - fallthrough - default: - d.saveError(&UnmarshalTypeError{Value: "array", Type: v.Type(), Offset: int64(d.off)}) - d.skip() - return nil - case reflect.Array, reflect.Slice: - break - } - - i := 0 - for { - // Look ahead for ] - can only happen on first iteration. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndArray { - break - } - - // Expand slice length, growing the slice if necessary. - if v.Kind() == reflect.Slice { - if i >= v.Cap() { - v.Grow(1) - } - if i >= v.Len() { - v.SetLen(i + 1) - } - } - - if i < v.Len() { - // Decode into element. - if err := d.value(v.Index(i)); err != nil { - return err - } - } else { - // Ran out of fixed array: skip. - if err := d.value(reflect.Value{}); err != nil { - return err - } - } - i++ - - // Next token must be , or ]. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode == scanEndArray { - break - } - if d.opcode != scanArrayValue { - panic(phasePanicMsg) - } - } - - if i < v.Len() { - if v.Kind() == reflect.Array { - for ; i < v.Len(); i++ { - v.Index(i).SetZero() // zero remainder of array - } - } else { - v.SetLen(i) // truncate the slice - } - } - if i == 0 && v.Kind() == reflect.Slice { - v.Set(reflect.MakeSlice(v.Type(), 0, 0)) - } - return nil -} - -var nullLiteral = []byte("null") -var textUnmarshalerType = reflect.TypeFor[encoding.TextUnmarshaler]() - -// object consumes an object from d.data[d.off-1:], decoding into v. -// The first byte ('{') of the object has been read already. -func (d *decodeState) object(v reflect.Value) error { - // Check for unmarshaler. - u, ut, pv := indirect(v, false) - if u != nil { - start := d.readIndex() - d.skip() - return u.UnmarshalJSON(d.data[start:d.off]) - } - if ut != nil { - d.saveError(&UnmarshalTypeError{Value: "object", Type: v.Type(), Offset: int64(d.off)}) - d.skip() - return nil - } - v = pv - t := v.Type() - - // Decoding into nil interface? Switch to non-reflect code. - if v.Kind() == reflect.Interface && v.NumMethod() == 0 { - oi := d.objectInterface() - v.Set(reflect.ValueOf(oi)) - return nil - } - - var fields structFields - - // Check type of target: - // struct or - // map[T1]T2 where T1 is string, an integer type, - // or an encoding.TextUnmarshaler - switch v.Kind() { - case reflect.Map: - // Map key must either have string kind, have an integer kind, - // or be an encoding.TextUnmarshaler. - switch t.Key().Kind() { - case reflect.String, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - default: - if !reflect.PointerTo(t.Key()).Implements(textUnmarshalerType) { - d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) - d.skip() - return nil - } - } - if v.IsNil() { - v.Set(reflect.MakeMap(t)) - } - case reflect.Struct: - fields = cachedTypeFields(t) - // ok - default: - d.saveError(&UnmarshalTypeError{Value: "object", Type: t, Offset: int64(d.off)}) - d.skip() - return nil - } - - var mapElem reflect.Value - var origErrorContext errorContext - if d.errorContext != nil { - origErrorContext = *d.errorContext - } - - for { - // Read opening " of string key or closing }. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndObject { - // closing } - can only happen on first iteration. - break - } - if d.opcode != scanBeginLiteral { - panic(phasePanicMsg) - } - - // Read key. - start := d.readIndex() - d.rescanLiteral() - item := d.data[start:d.readIndex()] - key, ok := unquoteBytes(item) - if !ok { - panic(phasePanicMsg) - } - - // Figure out field corresponding to key. - var subv reflect.Value - destring := false // whether the value is wrapped in a string to be decoded first - - if v.Kind() == reflect.Map { - elemType := t.Elem() - if !mapElem.IsValid() { - mapElem = reflect.New(elemType).Elem() - } else { - mapElem.SetZero() - } - subv = mapElem - } else { - f := fields.byExactName[string(key)] - if f == nil { - f = fields.byFoldedName[string(foldName(key))] - } - if f != nil { - subv = v - destring = f.quoted - if d.errorContext == nil { - d.errorContext = new(errorContext) - } - for i, ind := range f.index { - if subv.Kind() == reflect.Pointer { - if subv.IsNil() { - // If a struct embeds a pointer to an unexported type, - // it is not possible to set a newly allocated value - // since the field is unexported. - // - // See https://golang.org/issue/21357 - if !subv.CanSet() { - d.saveError(fmt.Errorf("json: cannot set embedded pointer to unexported struct: %v", subv.Type().Elem())) - // Invalidate subv to ensure d.value(subv) skips over - // the JSON value without assigning it to subv. - subv = reflect.Value{} - destring = false - break - } - subv.Set(reflect.New(subv.Type().Elem())) - } - subv = subv.Elem() - } - if i < len(f.index)-1 { - d.errorContext.FieldStack = append( - d.errorContext.FieldStack, - subv.Type().Field(ind).Name, - ) - } - subv = subv.Field(ind) - } - d.errorContext.Struct = t - d.errorContext.FieldStack = append(d.errorContext.FieldStack, f.name) - } else if d.disallowUnknownFields { - d.saveError(fmt.Errorf("json: unknown field %q", key)) - } - } - - // Read : before value. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode != scanObjectKey { - panic(phasePanicMsg) - } - d.scanWhile(scanSkipSpace) - - if destring { - switch qv := d.valueQuoted().(type) { - case nil: - if err := d.literalStore(nullLiteral, subv, false); err != nil { - return err - } - case string: - if err := d.literalStore([]byte(qv), subv, true); err != nil { - return err - } - default: - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal unquoted value into %v", subv.Type())) - } - } else { - if err := d.value(subv); err != nil { - return err - } - } - - // Write value back to map; - // if using struct, subv points into struct already. - if v.Kind() == reflect.Map { - kt := t.Key() - var kv reflect.Value - if reflect.PointerTo(kt).Implements(textUnmarshalerType) { - kv = reflect.New(kt) - if err := d.literalStore(item, kv, true); err != nil { - return err - } - kv = kv.Elem() - } else { - switch kt.Kind() { - case reflect.String: - kv = reflect.New(kt).Elem() - kv.SetString(string(key)) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - s := string(key) - n, err := strconv.ParseInt(s, 10, 64) - if err != nil || kt.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) - break - } - kv = reflect.New(kt).Elem() - kv.SetInt(n) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - s := string(key) - n, err := strconv.ParseUint(s, 10, 64) - if err != nil || kt.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + s, Type: kt, Offset: int64(start + 1)}) - break - } - kv = reflect.New(kt).Elem() - kv.SetUint(n) - default: - panic("json: Unexpected key type") // should never occur - } - } - if kv.IsValid() { - v.SetMapIndex(kv, subv) - } - } - - // Next token must be , or }. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.errorContext != nil { - // Reset errorContext to its original state. - // Keep the same underlying array for FieldStack, to reuse the - // space and avoid unnecessary allocs. - d.errorContext.FieldStack = d.errorContext.FieldStack[:len(origErrorContext.FieldStack)] - d.errorContext.Struct = origErrorContext.Struct - } - if d.opcode == scanEndObject { - break - } - if d.opcode != scanObjectValue { - panic(phasePanicMsg) - } - } - return nil -} - -// convertNumber converts the number literal s to a float64 or a Number -// depending on the setting of d.useNumber. -func (d *decodeState) convertNumber(s string) (any, error) { - if d.useNumber { - return Number(s), nil - } - f, err := strconv.ParseFloat(s, 64) +// UnmarshalJSONV2 implements [jsonv2.UnmarshalerV2]. +func (n *Number) UnmarshalJSONV2(dec *jsontext.Decoder, opts jsonv2.Options) error { + val, err := dec.ReadValue() if err != nil { - return nil, &UnmarshalTypeError{Value: "number " + s, Type: reflect.TypeFor[float64](), Offset: int64(d.off)} + return err } - return f, nil -} - -var numberType = reflect.TypeFor[Number]() - -// literalStore decodes a literal stored in item into v. -// -// fromQuoted indicates whether this literal came from unwrapping a -// string from the ",string" struct tag option. this is used only to -// produce more helpful error messages. -func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool) error { - // Check for unmarshaler. - if len(item) == 0 { - // Empty string given. - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) + k := val.Kind() + switch k { + case 'n': + *n = "" // TODO: Should we merge with legacy semantics? return nil - } - isNull := item[0] == 'n' // null - u, ut, pv := indirect(v, isNull) - if u != nil { - return u.UnmarshalJSON(item) - } - if ut != nil { - if item[0] != '"' { - if fromQuoted { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - return nil - } - val := "number" - switch item[0] { - case 'n': - val = "null" - case 't', 'f': - val = "bool" - } - d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())}) - return nil - } - s, ok := unquoteBytes(item) - if !ok { - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - panic(phasePanicMsg) - } - return ut.UnmarshalText(s) - } - - v = pv - - switch c := item[0]; c { - case 'n': // null - // The main parser checks that only true and false can reach here, - // but if this was a quoted string input, it could be anything. - if fromQuoted && string(item) != "null" { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - break - } - switch v.Kind() { - case reflect.Interface, reflect.Pointer, reflect.Map, reflect.Slice: - v.SetZero() - // otherwise, ignore null for primitives/string - } - case 't', 'f': // true, false - value := item[0] == 't' - // The main parser checks that only true and false can reach here, - // but if this was a quoted string input, it could be anything. - if fromQuoted && string(item) != "true" && string(item) != "false" { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - break - } - switch v.Kind() { - default: - if fromQuoted { - d.saveError(fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())) - } else { - d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) - } - case reflect.Bool: - v.SetBool(value) - case reflect.Interface: - if v.NumMethod() == 0 { - v.Set(reflect.ValueOf(value)) - } else { - d.saveError(&UnmarshalTypeError{Value: "bool", Type: v.Type(), Offset: int64(d.readIndex())}) - } - } - - case '"': // string - s, ok := unquoteBytes(item) - if !ok { - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - panic(phasePanicMsg) - } - switch v.Kind() { - default: - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) - case reflect.Slice: - if v.Type().Elem().Kind() != reflect.Uint8 { - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - b := make([]byte, base64.StdEncoding.DecodedLen(len(s))) - n, err := base64.StdEncoding.Decode(b, s) - if err != nil { - d.saveError(err) - break - } - v.SetBytes(b[:n]) - case reflect.String: - t := string(s) - if v.Type() == numberType && !isValidNumber(t) { - return fmt.Errorf("json: invalid number literal, trying to unmarshal %q into Number", item) - } - v.SetString(t) - case reflect.Interface: - if v.NumMethod() == 0 { - v.Set(reflect.ValueOf(string(s))) - } else { - d.saveError(&UnmarshalTypeError{Value: "string", Type: v.Type(), Offset: int64(d.readIndex())}) - } - } - - default: // number - if c != '-' && (c < '0' || c > '9') { - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - panic(phasePanicMsg) - } - switch v.Kind() { - default: - if v.Kind() == reflect.String && v.Type() == numberType { - // s must be a valid number, because it's - // already been tokenized. - v.SetString(string(item)) - break - } - if fromQuoted { - return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type()) - } - d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) - case reflect.Interface: - n, err := d.convertNumber(string(item)) - if err != nil { - d.saveError(err) - break - } - if v.NumMethod() != 0 { - d.saveError(&UnmarshalTypeError{Value: "number", Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.Set(reflect.ValueOf(n)) - - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - n, err := strconv.ParseInt(string(item), 10, 64) - if err != nil || v.OverflowInt(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.SetInt(n) - - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - n, err := strconv.ParseUint(string(item), 10, 64) - if err != nil || v.OverflowUint(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.SetUint(n) - - case reflect.Float32, reflect.Float64: - n, err := strconv.ParseFloat(string(item), v.Type().Bits()) - if err != nil || v.OverflowFloat(n) { - d.saveError(&UnmarshalTypeError{Value: "number " + string(item), Type: v.Type(), Offset: int64(d.readIndex())}) - break - } - v.SetFloat(n) - } - } - return nil -} - -// The xxxInterface routines build up a value to be stored -// in an empty interface. They are not strictly necessary, -// but they avoid the weight of reflection in this common case. - -// valueInterface is like value but returns any. -func (d *decodeState) valueInterface() (val any) { - switch d.opcode { - default: - panic(phasePanicMsg) - case scanBeginArray: - val = d.arrayInterface() - d.scanNext() - case scanBeginObject: - val = d.objectInterface() - d.scanNext() - case scanBeginLiteral: - val = d.literalInterface() - } - return -} - -// arrayInterface is like array but returns []any. -func (d *decodeState) arrayInterface() []any { - var v = make([]any, 0) - for { - // Look ahead for ] - can only happen on first iteration. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndArray { + case '"': + if stringify, _ := jsonv2.GetOption(opts, jsonv2.StringifyNumbers); !stringify { break } - - v = append(v, d.valueInterface()) - - // Next token must be , or ]. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode == scanEndArray { - break - } - if d.opcode != scanArrayValue { - panic(phasePanicMsg) - } - } - return v -} - -// objectInterface is like object but returns map[string]any. -func (d *decodeState) objectInterface() map[string]any { - m := make(map[string]any) - for { - // Read opening " of string key or closing }. - d.scanWhile(scanSkipSpace) - if d.opcode == scanEndObject { - // closing } - can only happen on first iteration. - break - } - if d.opcode != scanBeginLiteral { - panic(phasePanicMsg) - } - - // Read string key. - start := d.readIndex() - d.rescanLiteral() - item := d.data[start:d.readIndex()] - key, ok := unquote(item) - if !ok { - panic(phasePanicMsg) - } - - // Read : before value. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode != scanObjectKey { - panic(phasePanicMsg) + verbatim := jsonwire.ConsumeSimpleString(val) == len(val) + val = jsonwire.UnquoteMayCopy(val, verbatim) + if n, err := jsonwire.ConsumeNumber(val); n != len(val) || err != nil { + return fmt.Errorf("cannot parse %q as JSON number: %w", val, strconv.ErrSyntax) } - d.scanWhile(scanSkipSpace) - - // Read value. - m[key] = d.valueInterface() - - // Next token must be , or }. - if d.opcode == scanSkipSpace { - d.scanWhile(scanSkipSpace) - } - if d.opcode == scanEndObject { - break - } - if d.opcode != scanObjectValue { - panic(phasePanicMsg) - } - } - return m -} - -// literalInterface consumes and returns a literal from d.data[d.off-1:] and -// it reads the following byte ahead. The first byte of the literal has been -// read already (that's how the caller knows it's a literal). -func (d *decodeState) literalInterface() any { - // All bytes inside literal return scanContinue op code. - start := d.readIndex() - d.rescanLiteral() - - item := d.data[start:d.readIndex()] - - switch c := item[0]; c { - case 'n': // null + fallthrough + case '0': + *n = Number(val) return nil - - case 't', 'f': // true, false - return c == 't' - - case '"': // string - s, ok := unquote(item) - if !ok { - panic(phasePanicMsg) - } - return s - - default: // number - if c != '-' && (c < '0' || c > '9') { - panic(phasePanicMsg) - } - n, err := d.convertNumber(string(item)) - if err != nil { - d.saveError(err) - } - return n - } -} - -// getu4 decodes \uXXXX from the beginning of s, returning the hex value, -// or it returns -1. -func getu4(s []byte) rune { - if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { - return -1 - } - var r rune - for _, c := range s[2:6] { - switch { - case '0' <= c && c <= '9': - c = c - '0' - case 'a' <= c && c <= 'f': - c = c - 'a' + 10 - case 'A' <= c && c <= 'F': - c = c - 'A' + 10 - default: - return -1 - } - r = r*16 + rune(c) - } - return r -} - -// unquote converts a quoted JSON string literal s into an actual string t. -// The rules are different than for Go, so cannot use strconv.Unquote. -func unquote(s []byte) (t string, ok bool) { - s, ok = unquoteBytes(s) - t = string(s) - return -} - -// unquoteBytes should be an internal detail, -// but widely used packages access it using linkname. -// Notable members of the hall of shame include: -// - github.com/bytedance/sonic -// -// Do not remove or change the type signature. -// See go.dev/issue/67401. -// -//go:linkname unquoteBytes -func unquoteBytes(s []byte) (t []byte, ok bool) { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { - return - } - s = s[1 : len(s)-1] - - // Check for unusual characters. If there are none, - // then no unquoting is needed, so return a slice of the - // original bytes. - r := 0 - for r < len(s) { - c := s[r] - if c == '\\' || c == '"' || c < ' ' { - break - } - if c < utf8.RuneSelf { - r++ - continue - } - rr, size := utf8.DecodeRune(s[r:]) - if rr == utf8.RuneError && size == 1 { - break - } - r += size - } - if r == len(s) { - return s, true - } - - b := make([]byte, len(s)+2*utf8.UTFMax) - w := copy(b, s[0:r]) - for r < len(s) { - // Out of room? Can only happen if s is full of - // malformed UTF-8 and we're replacing each - // byte with RuneError. - if w >= len(b)-2*utf8.UTFMax { - nb := make([]byte, (len(b)+utf8.UTFMax)*2) - copy(nb, b[0:w]) - b = nb - } - switch c := s[r]; { - case c == '\\': - r++ - if r >= len(s) { - return - } - switch s[r] { - default: - return - case '"', '\\', '/', '\'': - b[w] = s[r] - r++ - w++ - case 'b': - b[w] = '\b' - r++ - w++ - case 'f': - b[w] = '\f' - r++ - w++ - case 'n': - b[w] = '\n' - r++ - w++ - case 'r': - b[w] = '\r' - r++ - w++ - case 't': - b[w] = '\t' - r++ - w++ - case 'u': - r-- - rr := getu4(s[r:]) - if rr < 0 { - return - } - r += 6 - if utf16.IsSurrogate(rr) { - rr1 := getu4(s[r:]) - if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { - // A valid pair; consume. - r += 6 - w += utf8.EncodeRune(b[w:], dec) - break - } - // Invalid surrogate; fall back to replacement rune. - rr = unicode.ReplacementChar - } - w += utf8.EncodeRune(b[w:], rr) - } - - // Quote, control characters are invalid. - case c == '"', c < ' ': - return - - // ASCII - case c < utf8.RuneSelf: - b[w] = c - r++ - w++ - - // Coerce to well-formed UTF-8. - default: - rr, size := utf8.DecodeRune(s[r:]) - r += size - w += utf8.EncodeRune(b[w:], rr) - } } - return b[0:w], true + return &jsonv2.SemanticError{JSONKind: k, GoType: numberType} } diff --git a/v1/decode_test.go b/v1/decode_test.go index de09fae..a1a4e58 100644 --- a/v1/decode_test.go +++ b/v1/decode_test.go @@ -1107,6 +1107,7 @@ func TestMarshalInvalidUTF8(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) got, err := Marshal(tt.in) if string(got) != tt.want || err != nil { t.Errorf("%s: Marshal(%q):\n\tgot: (%q, %v)\n\twant: (%q, nil)", tt.Where, tt.in, got, err, tt.want) @@ -1128,6 +1129,7 @@ func TestMarshalNumberZeroVal(t *testing.T) { } func TestMarshalEmbeds(t *testing.T) { + skipKnownFailure(t) top := &Top{ Level0: 1, Embed0: Embed0{ @@ -1198,9 +1200,9 @@ func equalError(a, b error) bool { func TestUnmarshal(t *testing.T) { for _, tt := range unmarshalTests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) in := []byte(tt.in) - var scan scanner - if err := checkValid(in, &scan); err != nil { + if err := checkValid(in); err != nil { if !equalError(err, tt.err) { t.Fatalf("%s: checkValid error: %#v", tt.Where, err) } @@ -1401,6 +1403,7 @@ func TestErrorMessageFromMisusedString(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) r := strings.NewReader(tt.in) var s WrongString err := NewDecoder(r).Decode(&s) @@ -1777,6 +1780,7 @@ func TestEmptyString(t *testing.T) { // Test that a null for ,string is not replaced with the previous quoted string (issue 7046). // It should also not be an error (issue 2540, issue 8587). func TestNullString(t *testing.T) { + skipKnownFailure(t) type T struct { A int `json:",string"` B int `json:",string"` @@ -1832,6 +1836,7 @@ func TestInterfaceSet(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) b := struct{ X any }{tt.pre} blob := `{"X":` + tt.json + `}` if err := Unmarshal([]byte(blob), &b); err != nil { @@ -1882,6 +1887,7 @@ type NullTest struct { // JSON null values should be ignored for primitives and string values instead of resulting in an error. // Issue 2540 func TestUnmarshalNulls(t *testing.T) { + skipKnownFailure(t) // Unmarshal docs: // The JSON null value unmarshals into an interface, map, pointer, or slice // by setting that Go value to nil. Because null is often used in JSON to mean @@ -2087,6 +2093,7 @@ func TestUnmarshalTypeError(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) err := Unmarshal([]byte(tt.in), tt.dest) if _, ok := err.(*UnmarshalTypeError); !ok { t.Errorf("%s: Unmarshal(%#q, %T):\n\tgot: %T\n\twant: %T", @@ -2113,6 +2120,7 @@ func TestUnmarshalSyntax(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) err := Unmarshal([]byte(tt.in), &x) if _, ok := err.(*SyntaxError); !ok { t.Errorf("%s: Unmarshal(%#q, any):\n\tgot: %T\n\twant: %T", @@ -2133,6 +2141,7 @@ type unexportedFields struct { } func TestUnmarshalUnexported(t *testing.T) { + skipKnownFailure(t) input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}, "s": [2, 3]}` want := &unexportedFields{Name: "Bob"} @@ -2228,6 +2237,7 @@ func TestPrefilled(t *testing.T) { }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) ptrstr := fmt.Sprintf("%v", tt.ptr) err := Unmarshal([]byte(tt.in), tt.ptr) // tt.ptr edited here if err != nil { @@ -2257,6 +2267,7 @@ func TestInvalidUnmarshal(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) switch gotErr := Unmarshal([]byte(tt.in), tt.v); { case gotErr == nil: t.Fatalf("%s: Unmarshal error: got nil, want non-nil", tt.Where) @@ -2422,6 +2433,7 @@ func TestUnmarshalEmbeddedUnexported(t *testing.T) { }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) err := Unmarshal([]byte(tt.in), tt.ptr) if !equalError(err, tt.err) { t.Errorf("%s: Unmarshal error:\n\tgot: %v\n\twant: %v", tt.Where, err, tt.err) @@ -2461,6 +2473,7 @@ func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) dec := NewDecoder(strings.NewReader(tt.in)) var err error for err == nil { @@ -2491,6 +2504,7 @@ func TestUnmarshalPanic(t *testing.T) { // The decoder used to hang if decoding into an interface pointing to its own address. // See golang.org/issues/31740. func TestUnmarshalRecursivePointer(t *testing.T) { + t.Skip("TODO: implement cycle detection in v2?") var v any v = &v data := []byte(`{"a": "b"}`) diff --git a/v1/encode.go b/v1/encode.go index d53e862..85a8186 100644 --- a/v1/encode.go +++ b/v1/encode.go @@ -11,20 +11,10 @@ package json import ( - "bytes" - "cmp" - "encoding" - "encoding/base64" - "fmt" - "math" "reflect" - "slices" "strconv" - "strings" - "sync" - "unicode" - "unicode/utf8" - _ "unsafe" // for linkname + + jsonv2 "github.com/go-json-experiment/json" ) // Marshal returns the JSON encoding of v. @@ -169,16 +159,7 @@ import ( // handle them. Passing cyclic structures to Marshal will result in // an error. func Marshal(v any) ([]byte, error) { - e := newEncodeState() - defer encodeStatePool.Put(e) - - err := e.marshal(v, encOpts{escapeHTML: true}) - if err != nil { - return nil, err - } - buf := append([]byte(nil), e.Bytes()...) - - return buf, nil + return jsonv2.Marshal(v, DefaultOptionsV1()) } // MarshalIndent is like [Marshal] but applies [Indent] to format the output. @@ -189,19 +170,16 @@ func MarshalIndent(v any, prefix, indent string) ([]byte, error) { if err != nil { return nil, err } - b2 := make([]byte, 0, indentGrowthFactor*len(b)) - b2, err = appendIndent(b2, b, prefix, indent) + b, err = appendIndent(nil, b, prefix, indent) if err != nil { return nil, err } - return b2, nil + return b, nil } // Marshaler is the interface implemented by types that // can marshal themselves into valid JSON. -type Marshaler interface { - MarshalJSON() ([]byte, error) -} +type Marshaler = jsonv2.MarshalerV1 // An UnsupportedTypeError is returned by [Marshal] when attempting // to encode an unsupported value type. @@ -258,1084 +236,3 @@ func (e *MarshalerError) Error() string { // Unwrap returns the underlying error. func (e *MarshalerError) Unwrap() error { return e.Err } - -const hex = "0123456789abcdef" - -// An encodeState encodes JSON into a bytes.Buffer. -type encodeState struct { - bytes.Buffer // accumulated output - - // Keep track of what pointers we've seen in the current recursive call - // path, to avoid cycles that could lead to a stack overflow. Only do - // the relatively expensive map operations if ptrLevel is larger than - // startDetectingCyclesAfter, so that we skip the work if we're within a - // reasonable amount of nested pointers deep. - ptrLevel uint - ptrSeen map[any]struct{} -} - -const startDetectingCyclesAfter = 1000 - -var encodeStatePool sync.Pool - -func newEncodeState() *encodeState { - if v := encodeStatePool.Get(); v != nil { - e := v.(*encodeState) - e.Reset() - if len(e.ptrSeen) > 0 { - panic("ptrEncoder.encode should have emptied ptrSeen via defers") - } - e.ptrLevel = 0 - return e - } - return &encodeState{ptrSeen: make(map[any]struct{})} -} - -// jsonError is an error wrapper type for internal use only. -// Panics with errors are wrapped in jsonError so that the top-level recover -// can distinguish intentional panics from this package. -type jsonError struct{ error } - -func (e *encodeState) marshal(v any, opts encOpts) (err error) { - defer func() { - if r := recover(); r != nil { - if je, ok := r.(jsonError); ok { - err = je.error - } else { - panic(r) - } - } - }() - e.reflectValue(reflect.ValueOf(v), opts) - return nil -} - -// error aborts the encoding by panicking with err wrapped in jsonError. -func (e *encodeState) error(err error) { - panic(jsonError{err}) -} - -func isEmptyValue(v reflect.Value) bool { - switch v.Kind() { - case reflect.Array, reflect.Map, reflect.Slice, reflect.String: - return v.Len() == 0 - case reflect.Bool, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, - reflect.Float32, reflect.Float64, - reflect.Interface, reflect.Pointer: - return v.IsZero() - } - return false -} - -func (e *encodeState) reflectValue(v reflect.Value, opts encOpts) { - valueEncoder(v)(e, v, opts) -} - -type encOpts struct { - // quoted causes primitive fields to be encoded inside JSON strings. - quoted bool - // escapeHTML causes '<', '>', and '&' to be escaped in JSON strings. - escapeHTML bool -} - -type encoderFunc func(e *encodeState, v reflect.Value, opts encOpts) - -var encoderCache sync.Map // map[reflect.Type]encoderFunc - -func valueEncoder(v reflect.Value) encoderFunc { - if !v.IsValid() { - return invalidValueEncoder - } - return typeEncoder(v.Type()) -} - -func typeEncoder(t reflect.Type) encoderFunc { - if fi, ok := encoderCache.Load(t); ok { - return fi.(encoderFunc) - } - - // To deal with recursive types, populate the map with an - // indirect func before we build it. This type waits on the - // real func (f) to be ready and then calls it. This indirect - // func is only used for recursive types. - var ( - wg sync.WaitGroup - f encoderFunc - ) - wg.Add(1) - fi, loaded := encoderCache.LoadOrStore(t, encoderFunc(func(e *encodeState, v reflect.Value, opts encOpts) { - wg.Wait() - f(e, v, opts) - })) - if loaded { - return fi.(encoderFunc) - } - - // Compute the real encoder and replace the indirect func with it. - f = newTypeEncoder(t, true) - wg.Done() - encoderCache.Store(t, f) - return f -} - -var ( - marshalerType = reflect.TypeFor[Marshaler]() - textMarshalerType = reflect.TypeFor[encoding.TextMarshaler]() -) - -// newTypeEncoder constructs an encoderFunc for a type. -// The returned encoder only checks CanAddr when allowAddr is true. -func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc { - // If we have a non-pointer value whose type implements - // Marshaler with a value receiver, then we're better off taking - // the address of the value - otherwise we end up with an - // allocation as we cast the value to an interface. - if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(marshalerType) { - return newCondAddrEncoder(addrMarshalerEncoder, newTypeEncoder(t, false)) - } - if t.Implements(marshalerType) { - return marshalerEncoder - } - if t.Kind() != reflect.Pointer && allowAddr && reflect.PointerTo(t).Implements(textMarshalerType) { - return newCondAddrEncoder(addrTextMarshalerEncoder, newTypeEncoder(t, false)) - } - if t.Implements(textMarshalerType) { - return textMarshalerEncoder - } - - switch t.Kind() { - case reflect.Bool: - return boolEncoder - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return intEncoder - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return uintEncoder - case reflect.Float32: - return float32Encoder - case reflect.Float64: - return float64Encoder - case reflect.String: - return stringEncoder - case reflect.Interface: - return interfaceEncoder - case reflect.Struct: - return newStructEncoder(t) - case reflect.Map: - return newMapEncoder(t) - case reflect.Slice: - return newSliceEncoder(t) - case reflect.Array: - return newArrayEncoder(t) - case reflect.Pointer: - return newPtrEncoder(t) - default: - return unsupportedTypeEncoder - } -} - -func invalidValueEncoder(e *encodeState, v reflect.Value, _ encOpts) { - e.WriteString("null") -} - -func marshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.Kind() == reflect.Pointer && v.IsNil() { - e.WriteString("null") - return - } - m, ok := v.Interface().(Marshaler) - if !ok { - e.WriteString("null") - return - } - b, err := m.MarshalJSON() - if err == nil { - e.Grow(len(b)) - out := e.AvailableBuffer() - out, err = appendCompact(out, b, opts.escapeHTML) - e.Buffer.Write(out) - } - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) - } -} - -func addrMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - va := v.Addr() - if va.IsNil() { - e.WriteString("null") - return - } - m := va.Interface().(Marshaler) - b, err := m.MarshalJSON() - if err == nil { - e.Grow(len(b)) - out := e.AvailableBuffer() - out, err = appendCompact(out, b, opts.escapeHTML) - e.Buffer.Write(out) - } - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalJSON"}) - } -} - -func textMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.Kind() == reflect.Pointer && v.IsNil() { - e.WriteString("null") - return - } - m, ok := v.Interface().(encoding.TextMarshaler) - if !ok { - e.WriteString("null") - return - } - b, err := m.MarshalText() - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalText"}) - } - e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) -} - -func addrTextMarshalerEncoder(e *encodeState, v reflect.Value, opts encOpts) { - va := v.Addr() - if va.IsNil() { - e.WriteString("null") - return - } - m := va.Interface().(encoding.TextMarshaler) - b, err := m.MarshalText() - if err != nil { - e.error(&MarshalerError{v.Type(), err, "MarshalText"}) - } - e.Write(appendString(e.AvailableBuffer(), b, opts.escapeHTML)) -} - -func boolEncoder(e *encodeState, v reflect.Value, opts encOpts) { - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = strconv.AppendBool(b, v.Bool()) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -func intEncoder(e *encodeState, v reflect.Value, opts encOpts) { - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = strconv.AppendInt(b, v.Int(), 10) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -func uintEncoder(e *encodeState, v reflect.Value, opts encOpts) { - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = strconv.AppendUint(b, v.Uint(), 10) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -type floatEncoder int // number of bits - -func (bits floatEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - f := v.Float() - if math.IsInf(f, 0) || math.IsNaN(f) { - e.error(&UnsupportedValueError{v, strconv.FormatFloat(f, 'g', -1, int(bits))}) - } - - // Convert as if by ES6 number to string conversion. - // This matches most other JSON generators. - // See golang.org/issue/6384 and golang.org/issue/14135. - // Like fmt %g, but the exponent cutoffs are different - // and exponents themselves are not padded to two digits. - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - abs := math.Abs(f) - fmt := byte('f') - // Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right. - if abs != 0 { - if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) { - fmt = 'e' - } - } - b = strconv.AppendFloat(b, f, fmt, -1, int(bits)) - if fmt == 'e' { - // clean up e-09 to e-9 - n := len(b) - if n >= 4 && b[n-4] == 'e' && b[n-3] == '-' && b[n-2] == '0' { - b[n-2] = b[n-1] - b = b[:n-1] - } - } - b = mayAppendQuote(b, opts.quoted) - e.Write(b) -} - -var ( - float32Encoder = (floatEncoder(32)).encode - float64Encoder = (floatEncoder(64)).encode -) - -func stringEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.Type() == numberType { - numStr := v.String() - // In Go1.5 the empty string encodes to "0", while this is not a valid number literal - // we keep compatibility so check validity after this. - if numStr == "" { - numStr = "0" // Number's zero-val - } - if !isValidNumber(numStr) { - e.error(fmt.Errorf("json: invalid number literal %q", numStr)) - } - b := e.AvailableBuffer() - b = mayAppendQuote(b, opts.quoted) - b = append(b, numStr...) - b = mayAppendQuote(b, opts.quoted) - e.Write(b) - return - } - if opts.quoted { - b := appendString(nil, v.String(), opts.escapeHTML) - e.Write(appendString(e.AvailableBuffer(), b, false)) // no need to escape again since it is already escaped - } else { - e.Write(appendString(e.AvailableBuffer(), v.String(), opts.escapeHTML)) - } -} - -// isValidNumber reports whether s is a valid JSON number literal. -// -// isValidNumber should be an internal detail, -// but widely used packages access it using linkname. -// Notable members of the hall of shame include: -// - github.com/bytedance/sonic -// -// Do not remove or change the type signature. -// See go.dev/issue/67401. -// -//go:linkname isValidNumber -func isValidNumber(s string) bool { - // This function implements the JSON numbers grammar. - // See https://tools.ietf.org/html/rfc7159#section-6 - // and https://www.json.org/img/number.png - - if s == "" { - return false - } - - // Optional - - if s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - - // Digits - switch { - default: - return false - - case s[0] == '0': - s = s[1:] - - case '1' <= s[0] && s[0] <= '9': - s = s[1:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // . followed by 1 or more digits. - if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { - s = s[2:] - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // e or E followed by an optional - or + and - // 1 or more digits. - if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') { - s = s[1:] - if s[0] == '+' || s[0] == '-' { - s = s[1:] - if s == "" { - return false - } - } - for len(s) > 0 && '0' <= s[0] && s[0] <= '9' { - s = s[1:] - } - } - - // Make sure we are at the end. - return s == "" -} - -func interfaceEncoder(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - e.reflectValue(v.Elem(), opts) -} - -func unsupportedTypeEncoder(e *encodeState, v reflect.Value, _ encOpts) { - e.error(&UnsupportedTypeError{v.Type()}) -} - -type structEncoder struct { - fields structFields -} - -type structFields struct { - list []field - byExactName map[string]*field - byFoldedName map[string]*field -} - -func (se structEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - next := byte('{') -FieldLoop: - for i := range se.fields.list { - f := &se.fields.list[i] - - // Find the nested struct field by following f.index. - fv := v - for _, i := range f.index { - if fv.Kind() == reflect.Pointer { - if fv.IsNil() { - continue FieldLoop - } - fv = fv.Elem() - } - fv = fv.Field(i) - } - - if (f.omitEmpty && isEmptyValue(fv)) || - (f.omitZero && (f.isZero == nil && fv.IsZero() || (f.isZero != nil && f.isZero(fv)))) { - continue - } - e.WriteByte(next) - next = ',' - if opts.escapeHTML { - e.WriteString(f.nameEscHTML) - } else { - e.WriteString(f.nameNonEsc) - } - opts.quoted = f.quoted - f.encoder(e, fv, opts) - } - if next == '{' { - e.WriteString("{}") - } else { - e.WriteByte('}') - } -} - -func newStructEncoder(t reflect.Type) encoderFunc { - se := structEncoder{fields: cachedTypeFields(t)} - return se.encode -} - -type mapEncoder struct { - elemEnc encoderFunc -} - -func (me mapEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { - // We're a large number of nested ptrEncoder.encode calls deep; - // start checking if we've run into a pointer cycle. - ptr := v.UnsafePointer() - if _, ok := e.ptrSeen[ptr]; ok { - e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) - } - e.ptrSeen[ptr] = struct{}{} - defer delete(e.ptrSeen, ptr) - } - e.WriteByte('{') - - // Extract and sort the keys. - var ( - sv = make([]reflectWithString, v.Len()) - mi = v.MapRange() - err error - ) - for i := 0; mi.Next(); i++ { - if sv[i].ks, err = resolveKeyName(mi.Key()); err != nil { - e.error(fmt.Errorf("json: encoding error for type %q: %q", v.Type().String(), err.Error())) - } - sv[i].v = mi.Value() - } - slices.SortFunc(sv, func(i, j reflectWithString) int { - return strings.Compare(i.ks, j.ks) - }) - - for i, kv := range sv { - if i > 0 { - e.WriteByte(',') - } - e.Write(appendString(e.AvailableBuffer(), kv.ks, opts.escapeHTML)) - e.WriteByte(':') - me.elemEnc(e, kv.v, opts) - } - e.WriteByte('}') - e.ptrLevel-- -} - -func newMapEncoder(t reflect.Type) encoderFunc { - switch t.Key().Kind() { - case reflect.String, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - default: - if !t.Key().Implements(textMarshalerType) { - return unsupportedTypeEncoder - } - } - me := mapEncoder{typeEncoder(t.Elem())} - return me.encode -} - -func encodeByteSlice(e *encodeState, v reflect.Value, _ encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - - s := v.Bytes() - b := e.AvailableBuffer() - b = append(b, '"') - b = base64.StdEncoding.AppendEncode(b, s) - b = append(b, '"') - e.Write(b) -} - -// sliceEncoder just wraps an arrayEncoder, checking to make sure the value isn't nil. -type sliceEncoder struct { - arrayEnc encoderFunc -} - -func (se sliceEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { - // We're a large number of nested ptrEncoder.encode calls deep; - // start checking if we've run into a pointer cycle. - // Here we use a struct to memorize the pointer to the first element of the slice - // and its length. - ptr := struct { - ptr any // always an unsafe.Pointer, but avoids a dependency on package unsafe - len int - }{v.UnsafePointer(), v.Len()} - if _, ok := e.ptrSeen[ptr]; ok { - e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) - } - e.ptrSeen[ptr] = struct{}{} - defer delete(e.ptrSeen, ptr) - } - se.arrayEnc(e, v, opts) - e.ptrLevel-- -} - -func newSliceEncoder(t reflect.Type) encoderFunc { - // Byte slices get special treatment; arrays don't. - if t.Elem().Kind() == reflect.Uint8 { - p := reflect.PointerTo(t.Elem()) - if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) { - return encodeByteSlice - } - } - enc := sliceEncoder{newArrayEncoder(t)} - return enc.encode -} - -type arrayEncoder struct { - elemEnc encoderFunc -} - -func (ae arrayEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - e.WriteByte('[') - n := v.Len() - for i := 0; i < n; i++ { - if i > 0 { - e.WriteByte(',') - } - ae.elemEnc(e, v.Index(i), opts) - } - e.WriteByte(']') -} - -func newArrayEncoder(t reflect.Type) encoderFunc { - enc := arrayEncoder{typeEncoder(t.Elem())} - return enc.encode -} - -type ptrEncoder struct { - elemEnc encoderFunc -} - -func (pe ptrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.IsNil() { - e.WriteString("null") - return - } - if e.ptrLevel++; e.ptrLevel > startDetectingCyclesAfter { - // We're a large number of nested ptrEncoder.encode calls deep; - // start checking if we've run into a pointer cycle. - ptr := v.Interface() - if _, ok := e.ptrSeen[ptr]; ok { - e.error(&UnsupportedValueError{v, fmt.Sprintf("encountered a cycle via %s", v.Type())}) - } - e.ptrSeen[ptr] = struct{}{} - defer delete(e.ptrSeen, ptr) - } - pe.elemEnc(e, v.Elem(), opts) - e.ptrLevel-- -} - -func newPtrEncoder(t reflect.Type) encoderFunc { - enc := ptrEncoder{typeEncoder(t.Elem())} - return enc.encode -} - -type condAddrEncoder struct { - canAddrEnc, elseEnc encoderFunc -} - -func (ce condAddrEncoder) encode(e *encodeState, v reflect.Value, opts encOpts) { - if v.CanAddr() { - ce.canAddrEnc(e, v, opts) - } else { - ce.elseEnc(e, v, opts) - } -} - -// newCondAddrEncoder returns an encoder that checks whether its value -// CanAddr and delegates to canAddrEnc if so, else to elseEnc. -func newCondAddrEncoder(canAddrEnc, elseEnc encoderFunc) encoderFunc { - enc := condAddrEncoder{canAddrEnc: canAddrEnc, elseEnc: elseEnc} - return enc.encode -} - -func isValidTag(s string) bool { - if s == "" { - return false - } - for _, c := range s { - switch { - case strings.ContainsRune("!#$%&()*+-./:;<=>?@[]^_{|}~ ", c): - // Backslash and quote chars are reserved, but - // otherwise any punctuation chars are allowed - // in a tag name. - case !unicode.IsLetter(c) && !unicode.IsDigit(c): - return false - } - } - return true -} - -func typeByIndex(t reflect.Type, index []int) reflect.Type { - for _, i := range index { - if t.Kind() == reflect.Pointer { - t = t.Elem() - } - t = t.Field(i).Type - } - return t -} - -type reflectWithString struct { - v reflect.Value - ks string -} - -func resolveKeyName(k reflect.Value) (string, error) { - if k.Kind() == reflect.String { - return k.String(), nil - } - if tm, ok := k.Interface().(encoding.TextMarshaler); ok { - if k.Kind() == reflect.Pointer && k.IsNil() { - return "", nil - } - buf, err := tm.MarshalText() - return string(buf), err - } - switch k.Kind() { - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - return strconv.FormatInt(k.Int(), 10), nil - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - return strconv.FormatUint(k.Uint(), 10), nil - } - panic("unexpected map key type") -} - -func appendString[Bytes []byte | string](dst []byte, src Bytes, escapeHTML bool) []byte { - dst = append(dst, '"') - start := 0 - for i := 0; i < len(src); { - if b := src[i]; b < utf8.RuneSelf { - if htmlSafeSet[b] || (!escapeHTML && safeSet[b]) { - i++ - continue - } - dst = append(dst, src[start:i]...) - switch b { - case '\\', '"': - dst = append(dst, '\\', b) - case '\b': - dst = append(dst, '\\', 'b') - case '\f': - dst = append(dst, '\\', 'f') - case '\n': - dst = append(dst, '\\', 'n') - case '\r': - dst = append(dst, '\\', 'r') - case '\t': - dst = append(dst, '\\', 't') - default: - // This encodes bytes < 0x20 except for \b, \f, \n, \r and \t. - // If escapeHTML is set, it also escapes <, >, and & - // because they can lead to security holes when - // user-controlled strings are rendered into JSON - // and served to some browsers. - dst = append(dst, '\\', 'u', '0', '0', hex[b>>4], hex[b&0xF]) - } - i++ - start = i - continue - } - // TODO(https://go.dev/issue/56948): Use generic utf8 functionality. - // For now, cast only a small portion of byte slices to a string - // so that it can be stack allocated. This slows down []byte slightly - // due to the extra copy, but keeps string performance roughly the same. - n := len(src) - i - if n > utf8.UTFMax { - n = utf8.UTFMax - } - c, size := utf8.DecodeRuneInString(string(src[i : i+n])) - if c == utf8.RuneError && size == 1 { - dst = append(dst, src[start:i]...) - dst = append(dst, `\ufffd`...) - i += size - start = i - continue - } - // U+2028 is LINE SEPARATOR. - // U+2029 is PARAGRAPH SEPARATOR. - // They are both technically valid characters in JSON strings, - // but don't work in JSONP, which has to be evaluated as JavaScript, - // and can lead to security holes there. It is valid JSON to - // escape them, so we do so unconditionally. - // See https://en.wikipedia.org/wiki/JSON#Safety. - if c == '\u2028' || c == '\u2029' { - dst = append(dst, src[start:i]...) - dst = append(dst, '\\', 'u', '2', '0', '2', hex[c&0xF]) - i += size - start = i - continue - } - i += size - } - dst = append(dst, src[start:]...) - dst = append(dst, '"') - return dst -} - -// A field represents a single field found in a struct. -type field struct { - name string - nameBytes []byte // []byte(name) - - nameNonEsc string // `"` + name + `":` - nameEscHTML string // `"` + HTMLEscape(name) + `":` - - tag bool - index []int - typ reflect.Type - omitEmpty bool - omitZero bool - isZero func(reflect.Value) bool - quoted bool - - encoder encoderFunc -} - -type isZeroer interface { - IsZero() bool -} - -var isZeroerType = reflect.TypeFor[isZeroer]() - -// typeFields returns a list of fields that JSON should recognize for the given type. -// The algorithm is breadth-first search over the set of structs to include - the top struct -// and then any reachable anonymous structs. -// -// typeFields should be an internal detail, -// but widely used packages access it using linkname. -// Notable members of the hall of shame include: -// - github.com/bytedance/sonic -// -// Do not remove or change the type signature. -// See go.dev/issue/67401. -// -//go:linkname typeFields -func typeFields(t reflect.Type) structFields { - // Anonymous fields to explore at the current level and the next. - current := []field{} - next := []field{{typ: t}} - - // Count of queued names for current level and the next. - var count, nextCount map[reflect.Type]int - - // Types already visited at an earlier level. - visited := map[reflect.Type]bool{} - - // Fields found. - var fields []field - - // Buffer to run appendHTMLEscape on field names. - var nameEscBuf []byte - - for len(next) > 0 { - current, next = next, current[:0] - count, nextCount = nextCount, map[reflect.Type]int{} - - for _, f := range current { - if visited[f.typ] { - continue - } - visited[f.typ] = true - - // Scan f.typ for fields to include. - for i := 0; i < f.typ.NumField(); i++ { - sf := f.typ.Field(i) - if sf.Anonymous { - t := sf.Type - if t.Kind() == reflect.Pointer { - t = t.Elem() - } - if !sf.IsExported() && t.Kind() != reflect.Struct { - // Ignore embedded fields of unexported non-struct types. - continue - } - // Do not ignore embedded fields of unexported struct types - // since they may have exported fields. - } else if !sf.IsExported() { - // Ignore unexported non-embedded fields. - continue - } - tag := sf.Tag.Get("json") - if tag == "-" { - continue - } - name, opts := parseTag(tag) - if !isValidTag(name) { - name = "" - } - index := make([]int, len(f.index)+1) - copy(index, f.index) - index[len(f.index)] = i - - ft := sf.Type - if ft.Name() == "" && ft.Kind() == reflect.Pointer { - // Follow pointer. - ft = ft.Elem() - } - - // Only strings, floats, integers, and booleans can be quoted. - quoted := false - if opts.Contains("string") { - switch ft.Kind() { - case reflect.Bool, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, - reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, - reflect.Float32, reflect.Float64, - reflect.String: - quoted = true - } - } - - // Record found field and index sequence. - if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct { - tagged := name != "" - if name == "" { - name = sf.Name - } - field := field{ - name: name, - tag: tagged, - index: index, - typ: ft, - omitEmpty: opts.Contains("omitempty"), - omitZero: opts.Contains("omitzero"), - quoted: quoted, - } - field.nameBytes = []byte(field.name) - - // Build nameEscHTML and nameNonEsc ahead of time. - nameEscBuf = appendHTMLEscape(nameEscBuf[:0], field.nameBytes) - field.nameEscHTML = `"` + string(nameEscBuf) + `":` - field.nameNonEsc = `"` + field.name + `":` - - if field.omitZero { - t := sf.Type - // Provide a function that uses a type's IsZero method. - switch { - case t.Kind() == reflect.Interface && t.Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - // Avoid panics calling IsZero on a nil interface or - // non-nil interface with nil pointer. - return v.IsNil() || - (v.Elem().Kind() == reflect.Pointer && v.Elem().IsNil()) || - v.Interface().(isZeroer).IsZero() - } - case t.Kind() == reflect.Pointer && t.Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - // Avoid panics calling IsZero on nil pointer. - return v.IsNil() || v.Interface().(isZeroer).IsZero() - } - case t.Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - return v.Interface().(isZeroer).IsZero() - } - case reflect.PointerTo(t).Implements(isZeroerType): - field.isZero = func(v reflect.Value) bool { - if !v.CanAddr() { - // Temporarily box v so we can take the address. - v2 := reflect.New(v.Type()).Elem() - v2.Set(v) - v = v2 - } - return v.Addr().Interface().(isZeroer).IsZero() - } - } - } - - fields = append(fields, field) - if count[f.typ] > 1 { - // If there were multiple instances, add a second, - // so that the annihilation code will see a duplicate. - // It only cares about the distinction between 1 and 2, - // so don't bother generating any more copies. - fields = append(fields, fields[len(fields)-1]) - } - continue - } - - // Record new anonymous struct to explore in next round. - nextCount[ft]++ - if nextCount[ft] == 1 { - next = append(next, field{name: ft.Name(), index: index, typ: ft}) - } - } - } - } - - slices.SortFunc(fields, func(a, b field) int { - // sort field by name, breaking ties with depth, then - // breaking ties with "name came from json tag", then - // breaking ties with index sequence. - if c := strings.Compare(a.name, b.name); c != 0 { - return c - } - if c := cmp.Compare(len(a.index), len(b.index)); c != 0 { - return c - } - if a.tag != b.tag { - if a.tag { - return -1 - } - return +1 - } - return slices.Compare(a.index, b.index) - }) - - // Delete all fields that are hidden by the Go rules for embedded fields, - // except that fields with JSON tags are promoted. - - // The fields are sorted in primary order of name, secondary order - // of field index length. Loop over names; for each name, delete - // hidden fields by choosing the one dominant field that survives. - out := fields[:0] - for advance, i := 0, 0; i < len(fields); i += advance { - // One iteration per name. - // Find the sequence of fields with the name of this first field. - fi := fields[i] - name := fi.name - for advance = 1; i+advance < len(fields); advance++ { - fj := fields[i+advance] - if fj.name != name { - break - } - } - if advance == 1 { // Only one field with this name - out = append(out, fi) - continue - } - dominant, ok := dominantField(fields[i : i+advance]) - if ok { - out = append(out, dominant) - } - } - - fields = out - slices.SortFunc(fields, func(i, j field) int { - return slices.Compare(i.index, j.index) - }) - - for i := range fields { - f := &fields[i] - f.encoder = typeEncoder(typeByIndex(t, f.index)) - } - exactNameIndex := make(map[string]*field, len(fields)) - foldedNameIndex := make(map[string]*field, len(fields)) - for i, field := range fields { - exactNameIndex[field.name] = &fields[i] - // For historical reasons, first folded match takes precedence. - if _, ok := foldedNameIndex[string(foldName(field.nameBytes))]; !ok { - foldedNameIndex[string(foldName(field.nameBytes))] = &fields[i] - } - } - return structFields{fields, exactNameIndex, foldedNameIndex} -} - -// dominantField looks through the fields, all of which are known to -// have the same name, to find the single field that dominates the -// others using Go's embedding rules, modified by the presence of -// JSON tags. If there are multiple top-level fields, the boolean -// will be false: This condition is an error in Go and we skip all -// the fields. -func dominantField(fields []field) (field, bool) { - // The fields are sorted in increasing index-length order, then by presence of tag. - // That means that the first field is the dominant one. We need only check - // for error cases: two fields at top level, either both tagged or neither tagged. - if len(fields) > 1 && len(fields[0].index) == len(fields[1].index) && fields[0].tag == fields[1].tag { - return field{}, false - } - return fields[0], true -} - -var fieldCache sync.Map // map[reflect.Type]structFields - -// cachedTypeFields is like typeFields but uses a cache to avoid repeated work. -func cachedTypeFields(t reflect.Type) structFields { - if f, ok := fieldCache.Load(t); ok { - return f.(structFields) - } - f, _ := fieldCache.LoadOrStore(t, typeFields(t)) - return f.(structFields) -} - -func mayAppendQuote(b []byte, quoted bool) []byte { - if quoted { - b = append(b, '"') - } - return b -} diff --git a/v1/encode_test.go b/v1/encode_test.go index 79c4817..3ed847f 100644 --- a/v1/encode_test.go +++ b/v1/encode_test.go @@ -85,6 +85,10 @@ func (nps *NoPanicStruct) IsZero() bool { return nps.Int != 0 } +type isZeroer interface { + IsZero() bool +} + type OptionalsZero struct { Sr string `json:"sr"` So string `json:"so,omitzero"` @@ -324,6 +328,7 @@ type renamedByteSlice []byte type renamedRenamedByteSlice []renamedByte func TestEncodeRenamedByteSlice(t *testing.T) { + skipKnownFailure(t) s := renamedByteSlice("abc") got, err := Marshal(s) if err != nil { @@ -380,6 +385,7 @@ func init() { mapCycle["x"] = mapCycle sliceCycle[0] = sliceCycle sliceNoCycle[1] = sliceNoCycle[:1] + const startDetectingCyclesAfter = 1e3 for i := startDetectingCyclesAfter; i > 0; i-- { sliceNoCycle = []any{sliceNoCycle} } @@ -414,6 +420,7 @@ func TestUnsupportedValues(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) if _, err := Marshal(tt.in); err != nil { if _, ok := err.(*UnsupportedValueError); !ok { t.Errorf("%s: Marshal error:\n\tgot: %T\n\twant: %T", tt.Where, err, new(UnsupportedValueError)) @@ -711,6 +718,7 @@ func TestAnonymousFields(t *testing.T) { for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) b, err := Marshal(tt.makeInput()) if err != nil { t.Fatalf("%s: Marshal error: %v", tt.Where, err) @@ -789,6 +797,7 @@ func TestNilMarshal(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) switch got, err := Marshal(tt.in); { case err != nil: t.Fatalf("%s: Marshal error: %v", tt.Where, err) @@ -1094,6 +1103,7 @@ func TestTextMarshalerMapKeysAreSorted(t *testing.T) { // https://golang.org/issue/33675 func TestNilMarshalerTextMapKey(t *testing.T) { + skipKnownFailure(t) got, err := Marshal(map[*unmarshalerText]int{ (*unmarshalerText)(nil): 1, {"A", "B"}: 2, @@ -1303,6 +1313,7 @@ func TestMarshalRawMessageValue(t *testing.T) { for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) b, err := Marshal(tt.in) if ok := (err == nil); ok != tt.ok { if err != nil { diff --git a/v1/failing.txt b/v1/failing.txt new file mode 100644 index 0000000..183df7a --- /dev/null +++ b/v1/failing.txt @@ -0,0 +1,184 @@ +TestMarshalInvalidUTF8 +TestMarshalInvalidUTF8/#00 +TestMarshalInvalidUTF8/#02 +TestMarshalInvalidUTF8/#03 +TestMarshalInvalidUTF8/#04 +TestMarshalInvalidUTF8/#05 +TestMarshalEmbeds +TestUnmarshal +TestUnmarshal/#07 +TestUnmarshal/#13 +TestUnmarshal/#14 +TestUnmarshal/#15 +TestUnmarshal/#16 +TestUnmarshal/#17 +TestUnmarshal/#18 +TestUnmarshal/#19 +TestUnmarshal/#21 +TestUnmarshal/#23 +TestUnmarshal/#30 +TestUnmarshal/#32 +TestUnmarshal/#35 +TestUnmarshal/#36 +TestUnmarshal/#37 +TestUnmarshal/#38 +TestUnmarshal/#39 +TestUnmarshal/#40 +TestUnmarshal/#41 +TestUnmarshal/#42 +TestUnmarshal/#43 +TestUnmarshal/#44 +TestUnmarshal/#45 +TestUnmarshal/#46 +TestUnmarshal/#47 +TestUnmarshal/#48 +TestUnmarshal/#52 +TestUnmarshal/#81 +TestUnmarshal/#82 +TestUnmarshal/#83 +TestUnmarshal/#84 +TestUnmarshal/#85 +TestUnmarshal/#86 +TestUnmarshal/#87 +TestUnmarshal/#90 +TestUnmarshal/#93 +TestUnmarshal/#95 +TestUnmarshal/#105 +TestUnmarshal/#106 +TestUnmarshal/#107 +TestUnmarshal/#109 +TestUnmarshal/#111 +TestUnmarshal/#113 +TestUnmarshal/#130 +TestUnmarshal/#131 +TestUnmarshal/#132 +TestUnmarshal/#135 +TestUnmarshal/#136 +TestUnmarshal/#137 +TestUnmarshal/#138 +TestUnmarshal/#139 +TestUnmarshal/#140 +TestUnmarshal/#141 +TestUnmarshal/#142 +TestUnmarshal/#143 +TestUnmarshal/#144 +TestUnmarshal/#145 +TestUnmarshal/#146 +TestUnmarshal/#147 +TestUnmarshal/#148 +TestUnmarshal/#149 +TestUnmarshal/#150 +TestUnmarshal/#151 +TestUnmarshal/#152 +TestUnmarshal/#153 +TestErrorMessageFromMisusedString +TestErrorMessageFromMisusedString/#00 +TestErrorMessageFromMisusedString/#01 +TestErrorMessageFromMisusedString/#02 +TestErrorMessageFromMisusedString/#03 +TestErrorMessageFromMisusedString/#04 +TestErrorMessageFromMisusedString/#05 +TestNullString +TestInterfaceSet +TestInterfaceSet/#01 +TestInterfaceSet/#02 +TestInterfaceSet/#07 +TestInterfaceSet/#10 +TestInterfaceSet/#11 +TestUnmarshalNulls +TestUnmarshalTypeError +TestUnmarshalTypeError/#00 +TestUnmarshalTypeError/#01 +TestUnmarshalTypeError/#02 +TestUnmarshalTypeError/#03 +TestUnmarshalTypeError/#04 +TestUnmarshalTypeError/#05 +TestUnmarshalSyntax +TestUnmarshalSyntax/#00 +TestUnmarshalSyntax/#01 +TestUnmarshalSyntax/#02 +TestUnmarshalSyntax/#03 +TestUnmarshalSyntax/#04 +TestUnmarshalSyntax/#05 +TestUnmarshalSyntax/#06 +TestUnmarshalSyntax/#07 +TestUnmarshalUnexported +TestPrefilled +TestPrefilled/#00 +TestPrefilled/#01 +TestInvalidUnmarshal +TestInvalidUnmarshal/#00 +TestInvalidUnmarshal/#01 +TestInvalidUnmarshal/#02 +TestInvalidUnmarshal/#03 +TestInvalidUnmarshal/#04 +TestInvalidUnmarshal/#05 +TestInvalidUnmarshal/#06 +TestUnmarshalEmbeddedUnexported +TestUnmarshalEmbeddedUnexported/#00 +TestUnmarshalEmbeddedUnexported/#01 +TestUnmarshalEmbeddedUnexported/#02 +TestUnmarshalEmbeddedUnexported/#03 +TestUnmarshalEmbeddedUnexported/#04 +TestUnmarshalEmbeddedUnexported/#05 +TestUnmarshalEmbeddedUnexported/#06 +TestUnmarshalEmbeddedUnexported/#07 +TestUnmarshalEmbeddedUnexported/#08 +TestUnmarshalErrorAfterMultipleJSON +TestUnmarshalErrorAfterMultipleJSON/#00 +TestUnmarshalErrorAfterMultipleJSON/#01 +TestUnmarshalErrorAfterMultipleJSON/#02 +TestUnmarshalErrorAfterMultipleJSON/#03 +TestUnmarshalErrorAfterMultipleJSON/#04 +TestEncodeRenamedByteSlice +TestUnsupportedValues +TestUnsupportedValues/#00 +TestUnsupportedValues/#01 +TestUnsupportedValues/#02 +TestUnsupportedValues/#03 +TestUnsupportedValues/#04 +TestUnsupportedValues/#05 +TestUnsupportedValues/#06 +TestUnsupportedValues/#07 +TestAnonymousFields +TestAnonymousFields/UnexportedEmbeddedInt +TestAnonymousFields/ExportedEmbeddedInt +TestAnonymousFields/UnexportedEmbeddedIntPointer +TestAnonymousFields/ExportedEmbeddedIntPointer +TestAnonymousFields/EmbeddedStruct +TestAnonymousFields/EmbeddedStructPointer +TestAnonymousFields/NestedStructAndInts +TestNilMarshal +TestNilMarshal/#08 +TestNilMarshal/#11 +TestNilMarshalerTextMapKey +TestMarshalRawMessageValue +TestMarshalRawMessageValue/#20 +TestMarshalRawMessageValue/#21 +TestMarshalRawMessageValue/#22 +TestMarshalRawMessageValue/#23 +TestMarshalRawMessageValue/#24 +TestMarshalRawMessageValue/#25 +TestMarshalRawMessageValue/#26 +TestMarshalRawMessageValue/#27 +TestMarshalRawMessageValue/#28 +TestMarshalRawMessageValue/#29 +TestMarshalRawMessageValue/#30 +TestMarshalRawMessageValue/#31 +TestMarshalRawMessageValue/#33 +TestMarshalRawMessageValue/#35 +TestIndentErrors +TestIndentErrors/#00 +TestIndentErrors/#01 +TestEncoderSetEscapeHTML +TestEncoderSetEscapeHTML/tagStruct +TestEncoderSetEscapeHTML/stringOption +TestRawMessage +TestDecodeInStream +TestDecodeInStream/#14 +TestDecodeInStream/#15 +TestDecodeInStream/#16 +TestDecodeInStream/#17 +TestStructTagObjectKey +TestStructTagObjectKey/#07 +TestStructTagObjectKey/#11 diff --git a/v1/failing_test.go b/v1/failing_test.go new file mode 100644 index 0000000..287ac7c --- /dev/null +++ b/v1/failing_test.go @@ -0,0 +1,92 @@ +package json + +import ( + _ "embed" + "flag" + "fmt" + "os" + "os/exec" + "slices" + "strings" + "sync" + "testing" +) + +var skipKnownFailures = flag.Bool("skip-known-failures", true, "skip tests that are known to already be failing") +var updateKnownFailures = flag.Bool("update-known-failures", false, "update the list of known failures") + +//go:embed failing.txt +var knownFailuresText string +var knownFailures = sync.OnceValue(func() map[string]bool { + failures := make(map[string]bool) + for _, s := range strings.Split(knownFailuresText, "\n") { + failures[strings.TrimRight(s, "\r")] = true + } + return failures +}) + +// skipKnownFailure skips the current test if it is in the failing.old list. +func skipKnownFailure(t *testing.T) { + if *skipKnownFailures && knownFailures()[t.Name()] { + t.SkipNow() + } +} + +// TestKnownFailures tests whether the failing.old is up-to-date. +func TestKnownFailures(t *testing.T) { + if !*skipKnownFailures { + return // avoid infinite recursion calling the same test + } + + // Produce a sorted list of currently known failures. + b, _ := exec.Command("go", "test", "-skip-known-failures=false", ".").CombinedOutput() + var newFailing []string + for _, line := range strings.Split(string(b), "\n") { + if _, suffix, ok := strings.Cut(strings.TrimRight(line, "\r"), "--- FAIL: "); ok { + suffix = strings.TrimSuffix(suffix, ")") + suffix = strings.TrimRight(suffix, ".0123456789s") + suffix = strings.TrimSuffix(suffix, " (") + newFailing = append(newFailing, suffix) + } + } + newFailingSorted := slices.Clone(newFailing) + slices.Sort(newFailingSorted) + + // Produce a sorted list of previously known failures. + oldFailing := strings.Split(strings.TrimSuffix(knownFailuresText, "\n"), "\n") + for i, s := range oldFailing { + oldFailing[i] = strings.TrimRight(s, "\r") + } + oldFailingSorted := slices.Clone(oldFailing) + slices.Sort(oldFailingSorted) + + // Check whether the two lists match. + if !slices.Equal(newFailingSorted, oldFailingSorted) { + var diff []string + before, after := oldFailingSorted, newFailingSorted + for len(before)|len(after) > 0 { + switch { + case len(before) == 0: + diff = append(diff, fmt.Sprintf("+ %s\n", after[0])) + after = after[1:] + case len(after) == 0: + diff = append(diff, fmt.Sprintf("- %s\n", before[0])) + before = before[1:] + case after[0] < before[0]: + diff = append(diff, fmt.Sprintf("+ %s\n", after[0])) + after = after[1:] + case before[0] < after[0]: + diff = append(diff, fmt.Sprintf("- %s\n", before[0])) + before = before[1:] + default: + before, after = before[1:], after[1:] + } + } + t.Errorf("known failures mismatch (-old +new):\n%s", strings.Join(diff, "")) + if *updateKnownFailures { + if err := os.WriteFile("failing.txt", []byte(strings.Join(newFailing, "\n")+"\n"), 0664); err != nil { + t.Errorf("os.WriteFile error: %v", err) + } + } + } +} diff --git a/v1/fold.go b/v1/fold.go deleted file mode 100644 index c4c671b..0000000 --- a/v1/fold.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "unicode" - "unicode/utf8" -) - -// foldName returns a folded string such that foldName(x) == foldName(y) -// is identical to bytes.EqualFold(x, y). -func foldName(in []byte) []byte { - // This is inlinable to take advantage of "function outlining". - var arr [32]byte // large enough for most JSON names - return appendFoldedName(arr[:0], in) -} - -func appendFoldedName(out, in []byte) []byte { - for i := 0; i < len(in); { - // Handle single-byte ASCII. - if c := in[i]; c < utf8.RuneSelf { - if 'a' <= c && c <= 'z' { - c -= 'a' - 'A' - } - out = append(out, c) - i++ - continue - } - // Handle multi-byte Unicode. - r, n := utf8.DecodeRune(in[i:]) - out = utf8.AppendRune(out, foldRune(r)) - i += n - } - return out -} - -// foldRune is returns the smallest rune for all runes in the same fold set. -func foldRune(r rune) rune { - for { - r2 := unicode.SimpleFold(r) - if r2 <= r { - return r2 - } - r = r2 - } -} diff --git a/v1/fold_test.go b/v1/fold_test.go deleted file mode 100644 index 9d6fd05..0000000 --- a/v1/fold_test.go +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2013 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "bytes" - "testing" -) - -func FuzzEqualFold(f *testing.F) { - for _, ss := range [][2]string{ - {"", ""}, - {"123abc", "123ABC"}, - {"αβδ", "ΑΒΔ"}, - {"abc", "xyz"}, - {"abc", "XYZ"}, - {"1", "2"}, - {"hello, world!", "hello, world!"}, - {"hello, world!", "Hello, World!"}, - {"hello, world!", "HELLO, WORLD!"}, - {"hello, world!", "jello, world!"}, - {"γειά, κόσμε!", "γειά, κόσμε!"}, - {"γειά, κόσμε!", "Γειά, Κόσμε!"}, - {"γειά, κόσμε!", "ΓΕΙΆ, ΚΌΣΜΕ!"}, - {"γειά, κόσμε!", "ΛΕΙΆ, ΚΌΣΜΕ!"}, - {"AESKey", "aesKey"}, - {"AESKEY", "aes_key"}, - {"aes_key", "AES_KEY"}, - {"AES_KEY", "aes-key"}, - {"aes-key", "AES-KEY"}, - {"AES-KEY", "aesKey"}, - {"aesKey", "AesKey"}, - {"AesKey", "AESKey"}, - {"AESKey", "aeskey"}, - {"DESKey", "aeskey"}, - {"AES Key", "aeskey"}, - } { - f.Add([]byte(ss[0]), []byte(ss[1])) - } - equalFold := func(x, y []byte) bool { return string(foldName(x)) == string(foldName(y)) } - f.Fuzz(func(t *testing.T, x, y []byte) { - got := equalFold(x, y) - want := bytes.EqualFold(x, y) - if got != want { - t.Errorf("equalFold(%q, %q) = %v, want %v", x, y, got, want) - } - }) -} diff --git a/v1/indent.go b/v1/indent.go index 01bfdf6..90fe480 100644 --- a/v1/indent.go +++ b/v1/indent.go @@ -4,7 +4,12 @@ package json -import "bytes" +import ( + "bytes" + "strings" + + "github.com/go-json-experiment/json/jsontext" +) // HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029 // characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029 @@ -17,6 +22,7 @@ func HTMLEscape(dst *bytes.Buffer, src []byte) { } func appendHTMLEscape(dst, src []byte) []byte { + const hex = "0123456789abcdef" // The characters can only appear in string literals, // so just scan the string one byte at a time. start := 0 @@ -41,59 +47,12 @@ func appendHTMLEscape(dst, src []byte) []byte { func Compact(dst *bytes.Buffer, src []byte) error { dst.Grow(len(src)) b := dst.AvailableBuffer() - b, err := appendCompact(b, src, false) - dst.Write(b) - return err -} - -func appendCompact(dst, src []byte, escape bool) ([]byte, error) { - origLen := len(dst) - scan := newScanner() - defer freeScanner(scan) - start := 0 - for i, c := range src { - if escape && (c == '<' || c == '>' || c == '&') { - if start < i { - dst = append(dst, src[start:i]...) - } - dst = append(dst, '\\', 'u', '0', '0', hex[c>>4], hex[c&0xF]) - start = i + 1 - } - // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9). - if escape && c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 { - if start < i { - dst = append(dst, src[start:i]...) - } - dst = append(dst, '\\', 'u', '2', '0', '2', hex[src[i+2]&0xF]) - start = i + 3 - } - v := scan.step(scan, c) - if v >= scanSkipSpace { - if v == scanError { - break - } - if start < i { - dst = append(dst, src[start:i]...) - } - start = i + 1 - } - } - if scan.eof() == scanError { - return dst[:origLen], scan.err - } - if start < len(src) { - dst = append(dst, src[start:]...) + b = append(b, src...) + if err := (*jsontext.Value)(&b).Compact(); err != nil { + return transformSyntacticError(err) } - return dst, nil -} - -func appendNewline(dst []byte, prefix, indent string, depth int) []byte { - dst = append(dst, '\n') - dst = append(dst, prefix...) - for i := 0; i < depth; i++ { - dst = append(dst, indent...) - } - return dst + dst.Write(b) + return nil } // indentGrowthFactor specifies the growth factor of indenting JSON input. @@ -124,59 +83,40 @@ func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error { } func appendIndent(dst, src []byte, prefix, indent string) ([]byte, error) { - origLen := len(dst) - scan := newScanner() - defer freeScanner(scan) - needIndent := false - depth := 0 - for _, c := range src { - scan.bytes++ - v := scan.step(scan, c) - if v == scanSkipSpace { - continue - } - if v == scanError { - break - } - if needIndent && v != scanEndObject && v != scanEndArray { - needIndent = false - depth++ - dst = appendNewline(dst, prefix, indent, depth) - } - - // Emit semantically uninteresting bytes - // (in particular, punctuation in strings) unmodified. - if v == scanContinue { - dst = append(dst, c) - continue - } - - // Add spacing around real punctuation. - switch c { - case '{', '[': - // delay indent so that empty object and array are formatted as {} and []. - needIndent = true - dst = append(dst, c) - case ',': - dst = append(dst, c) - dst = appendNewline(dst, prefix, indent, depth) - case ':': - dst = append(dst, c, ' ') - case '}', ']': - if needIndent { - // suppress indent in empty object/array - needIndent = false - } else { - depth-- - dst = appendNewline(dst, prefix, indent, depth) + // In v2, trailing whitespace is discarded, while v1 preserved it. + dstLen := len(dst) + if n := len(src) - len(bytes.TrimRight(src, " \n\r\t")); n > 0 { + // Append the trailing whitespace afterwards. + defer func() { + if len(dst) > dstLen { + dst = append(dst, src[len(src)-n:]...) } - dst = append(dst, c) - default: - dst = append(dst, c) - } + }() + } + // In v2, only spaces and tabs are allowed, while v1 allowed any character. + if len(strings.Trim(prefix, " \t"))+len(strings.Trim(indent, " \t")) > 0 { + // Use placeholder spaces of correct length, and replace afterwards. + invalidPrefix, invalidIndent := prefix, indent + prefix = strings.Repeat(" ", len(prefix)) + indent = strings.Repeat(" ", len(indent)) + defer func() { + b := dst[dstLen:] + for i := bytes.IndexByte(b, '\n'); i >= 0; i = bytes.IndexByte(b, '\n') { + b = b[i+len("\n"):] + n := len(b) - len(bytes.TrimLeft(b, " ")) // len(prefix)+n*len(indent) + spaces := b[:n] + spaces = spaces[copy(spaces, invalidPrefix):] + for len(spaces) > 0 { + spaces = spaces[copy(spaces, invalidIndent):] + } + b = b[n:] + } + }() } - if scan.eof() == scanError { - return dst[:origLen], scan.err + + dst = append(dst, src...) + if err := (*jsontext.Value)(&dst).Indent(prefix, indent); err != nil { + return dst[:dstLen], transformSyntacticError(err) } return dst, nil } diff --git a/v1/number_test.go b/v1/number_test.go deleted file mode 100644 index c82e6de..0000000 --- a/v1/number_test.go +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import ( - "regexp" - "testing" -) - -func TestNumberIsValid(t *testing.T) { - // From: https://stackoverflow.com/a/13340826 - var jsonNumberRegexp = regexp.MustCompile(`^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][+-]?\d+)?$`) - - validTests := []string{ - "0", - "-0", - "1", - "-1", - "0.1", - "-0.1", - "1234", - "-1234", - "12.34", - "-12.34", - "12E0", - "12E1", - "12e34", - "12E-0", - "12e+1", - "12e-34", - "-12E0", - "-12E1", - "-12e34", - "-12E-0", - "-12e+1", - "-12e-34", - "1.2E0", - "1.2E1", - "1.2e34", - "1.2E-0", - "1.2e+1", - "1.2e-34", - "-1.2E0", - "-1.2E1", - "-1.2e34", - "-1.2E-0", - "-1.2e+1", - "-1.2e-34", - "0E0", - "0E1", - "0e34", - "0E-0", - "0e+1", - "0e-34", - "-0E0", - "-0E1", - "-0e34", - "-0E-0", - "-0e+1", - "-0e-34", - } - - for _, test := range validTests { - if !isValidNumber(test) { - t.Errorf("%s should be valid", test) - } - - var f float64 - if err := Unmarshal([]byte(test), &f); err != nil { - t.Errorf("%s should be valid but Unmarshal failed: %v", test, err) - } - - if !jsonNumberRegexp.MatchString(test) { - t.Errorf("%s should be valid but regexp does not match", test) - } - } - - invalidTests := []string{ - "", - "invalid", - "1.0.1", - "1..1", - "-1-2", - "012a42", - "01.2", - "012", - "12E12.12", - "1e2e3", - "1e+-2", - "1e--23", - "1e", - "e1", - "1e+", - "1ea", - "1a", - "1.a", - "1.", - "01", - "1.e1", - } - - for _, test := range invalidTests { - if isValidNumber(test) { - t.Errorf("%s should be invalid", test) - } - - var f float64 - if err := Unmarshal([]byte(test), &f); err == nil { - t.Errorf("%s should be invalid but unmarshal wrote %v", test, f) - } - - if jsonNumberRegexp.MatchString(test) { - t.Errorf("%s should be invalid but matches regexp", test) - } - } -} diff --git a/v1/options.go b/v1/options.go index 57b8d09..26dde71 100644 --- a/v1/options.go +++ b/v1/options.go @@ -181,3 +181,13 @@ func UnmarshalArrayFromAnyLength(v bool) Options { return jsonflags.UnmarshalArrayFromAnyLength | 0 } } + +// unmarshalAnyWithRawNumber specifies that unmarshaling a JSON number into +// an empty Go interface should use the Number type instead of a float64. +func unmarshalAnyWithRawNumber(v bool) Options { + if v { + return jsonflags.UnmarshalAnyWithRawNumber | 1 + } else { + return jsonflags.UnmarshalAnyWithRawNumber | 0 + } +} diff --git a/v1/scanner.go b/v1/scanner.go index da6ea2a..1e97ea5 100644 --- a/v1/scanner.go +++ b/v1/scanner.go @@ -4,40 +4,32 @@ package json -// JSON value parser state machine. -// Just about at the limit of what is reasonable to write by hand. -// Some parts are a bit tedious, but overall it nicely factors out the -// otherwise common code from the multiple scanning functions -// in this package (Compact, Indent, checkValid, etc). -// -// This file starts with two simple examples using the scanner -// before diving into the scanner itself. - import ( - "strconv" - "sync" + "errors" + + "github.com/go-json-experiment/json/internal" + "github.com/go-json-experiment/json/internal/jsonflags" + "github.com/go-json-experiment/json/jsontext" ) +// export exposes internal functionality of the "jsontext" package. +var export = jsontext.Internal.Export(&internal.AllowInternalUse) + // Valid reports whether data is a valid JSON encoding. func Valid(data []byte) bool { - scan := newScanner() - defer freeScanner(scan) - return checkValid(data, scan) == nil + return checkValid(data) == nil } -// checkValid verifies that data is valid JSON-encoded data. -// scan is passed in for use by checkValid to avoid an allocation. -// checkValid returns nil or a SyntaxError. -func checkValid(data []byte, scan *scanner) error { - scan.reset() - for _, c := range data { - scan.bytes++ - if scan.step(scan, c) == scanError { - return scan.err - } +func checkValid(data []byte) error { + d := export.GetBufferedDecoder(data) + defer export.PutBufferedDecoder(d) + xd := export.Decoder(d) + xd.Struct.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) + if _, err := d.ReadValue(); err != nil { + return transformSyntacticError(err) } - if scan.eof() == scanError { - return scan.err + if err := xd.CheckEOF(); err != nil { + return transformSyntacticError(err) } return nil } @@ -51,560 +43,15 @@ type SyntaxError struct { func (e *SyntaxError) Error() string { return e.msg } -// A scanner is a JSON scanning state machine. -// Callers call scan.reset and then pass bytes in one at a time -// by calling scan.step(&scan, c) for each byte. -// The return value, referred to as an opcode, tells the -// caller about significant parsing events like beginning -// and ending literals, objects, and arrays, so that the -// caller can follow along if it wishes. -// The return value scanEnd indicates that a single top-level -// JSON value has been completed, *before* the byte that -// just got passed in. (The indication must be delayed in order -// to recognize the end of numbers: is 123 a whole value or -// the beginning of 12345e+6?). -type scanner struct { - // The step is a func to be called to execute the next transition. - // Also tried using an integer constant and a single func - // with a switch, but using the func directly was 10% faster - // on a 64-bit Mac Mini, and it's nicer to read. - step func(*scanner, byte) int - - // Reached end of top-level value. - endTop bool - - // Stack of what we're in the middle of - array values, object keys, object values. - parseState []int - - // Error that happened, if any. - err error - - // total bytes consumed, updated by decoder.Decode (and deliberately - // not set to zero by scan.reset) - bytes int64 -} - -var scannerPool = sync.Pool{ - New: func() any { - return &scanner{} - }, -} - -func newScanner() *scanner { - scan := scannerPool.Get().(*scanner) - // scan.reset by design doesn't set bytes to zero - scan.bytes = 0 - scan.reset() - return scan -} - -func freeScanner(scan *scanner) { - // Avoid hanging on to too much memory in extreme cases. - if len(scan.parseState) > 1024 { - scan.parseState = nil - } - scannerPool.Put(scan) -} - -// These values are returned by the state transition functions -// assigned to scanner.state and the method scanner.eof. -// They give details about the current state of the scan that -// callers might be interested to know about. -// It is okay to ignore the return value of any particular -// call to scanner.state: if one call returns scanError, -// every subsequent call will return scanError too. -const ( - // Continue. - scanContinue = iota // uninteresting byte - scanBeginLiteral // end implied by next result != scanContinue - scanBeginObject // begin object - scanObjectKey // just finished object key (string) - scanObjectValue // just finished non-last object value - scanEndObject // end object (implies scanObjectValue if possible) - scanBeginArray // begin array - scanArrayValue // just finished array value - scanEndArray // end array (implies scanArrayValue if possible) - scanSkipSpace // space byte; can skip; known to be last "continue" result - - // Stop. - scanEnd // top-level value ended *before* this byte; known to be first "stop" result - scanError // hit an error, scanner.err. -) - -// These values are stored in the parseState stack. -// They give the current state of a composite value -// being scanned. If the parser is inside a nested value -// the parseState describes the nested state, outermost at entry 0. -const ( - parseObjectKey = iota // parsing object key (before colon) - parseObjectValue // parsing object value (after colon) - parseArrayValue // parsing array value -) - -// This limits the max nesting depth to prevent stack overflow. -// This is permitted by https://tools.ietf.org/html/rfc7159#section-9 -const maxNestingDepth = 10000 - -// reset prepares the scanner for use. -// It must be called before calling s.step. -func (s *scanner) reset() { - s.step = stateBeginValue - s.parseState = s.parseState[0:0] - s.err = nil - s.endTop = false -} - -// eof tells the scanner that the end of input has been reached. -// It returns a scan status just as s.step does. -func (s *scanner) eof() int { - if s.err != nil { - return scanError - } - if s.endTop { - return scanEnd - } - s.step(s, ' ') - if s.endTop { - return scanEnd - } - if s.err == nil { - s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} - } - return scanError -} - -// pushParseState pushes a new parse state p onto the parse stack. -// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned. -func (s *scanner) pushParseState(c byte, newParseState int, successState int) int { - s.parseState = append(s.parseState, newParseState) - if len(s.parseState) <= maxNestingDepth { - return successState - } - return s.error(c, "exceeded max depth") -} - -// popParseState pops a parse state (already obtained) off the stack -// and updates s.step accordingly. -func (s *scanner) popParseState() { - n := len(s.parseState) - 1 - s.parseState = s.parseState[0:n] - if n == 0 { - s.step = stateEndTop - s.endTop = true - } else { - s.step = stateEndValue - } -} - -func isSpace(c byte) bool { - return c <= ' ' && (c == ' ' || c == '\t' || c == '\r' || c == '\n') -} - -// stateBeginValueOrEmpty is the state after reading `[`. -func stateBeginValueOrEmpty(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - if c == ']' { - return stateEndValue(s, c) - } - return stateBeginValue(s, c) -} - -// stateBeginValue is the state at the beginning of the input. -func stateBeginValue(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - switch c { - case '{': - s.step = stateBeginStringOrEmpty - return s.pushParseState(c, parseObjectKey, scanBeginObject) - case '[': - s.step = stateBeginValueOrEmpty - return s.pushParseState(c, parseArrayValue, scanBeginArray) - case '"': - s.step = stateInString - return scanBeginLiteral - case '-': - s.step = stateNeg - return scanBeginLiteral - case '0': // beginning of 0.123 - s.step = state0 - return scanBeginLiteral - case 't': // beginning of true - s.step = stateT - return scanBeginLiteral - case 'f': // beginning of false - s.step = stateF - return scanBeginLiteral - case 'n': // beginning of null - s.step = stateN - return scanBeginLiteral - } - if '1' <= c && c <= '9' { // beginning of 1234.5 - s.step = state1 - return scanBeginLiteral - } - return s.error(c, "looking for beginning of value") -} - -// stateBeginStringOrEmpty is the state after reading `{`. -func stateBeginStringOrEmpty(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - if c == '}' { - n := len(s.parseState) - s.parseState[n-1] = parseObjectValue - return stateEndValue(s, c) - } - return stateBeginString(s, c) -} - -// stateBeginString is the state after reading `{"key": value,`. -func stateBeginString(s *scanner, c byte) int { - if isSpace(c) { - return scanSkipSpace - } - if c == '"' { - s.step = stateInString - return scanBeginLiteral - } - return s.error(c, "looking for beginning of object key string") -} - -// stateEndValue is the state after completing a value, -// such as after reading `{}` or `true` or `["x"`. -func stateEndValue(s *scanner, c byte) int { - n := len(s.parseState) - if n == 0 { - // Completed top-level before the current byte. - s.step = stateEndTop - s.endTop = true - return stateEndTop(s, c) - } - if isSpace(c) { - s.step = stateEndValue - return scanSkipSpace - } - ps := s.parseState[n-1] - switch ps { - case parseObjectKey: - if c == ':' { - s.parseState[n-1] = parseObjectValue - s.step = stateBeginValue - return scanObjectKey - } - return s.error(c, "after object key") - case parseObjectValue: - if c == ',' { - s.parseState[n-1] = parseObjectKey - s.step = stateBeginString - return scanObjectValue - } - if c == '}' { - s.popParseState() - return scanEndObject - } - return s.error(c, "after object key:value pair") - case parseArrayValue: - if c == ',' { - s.step = stateBeginValue - return scanArrayValue - } - if c == ']' { - s.popParseState() - return scanEndArray - } - return s.error(c, "after array element") - } - return s.error(c, "") -} - -// stateEndTop is the state after finishing the top-level value, -// such as after reading `{}` or `[1,2,3]`. -// Only space characters should be seen now. -func stateEndTop(s *scanner, c byte) int { - if !isSpace(c) { - // Complain about non-space byte on next call. - s.error(c, "after top-level value") - } - return scanEnd -} - -// stateInString is the state after reading `"`. -func stateInString(s *scanner, c byte) int { - if c == '"' { - s.step = stateEndValue - return scanContinue - } - if c == '\\' { - s.step = stateInStringEsc - return scanContinue - } - if c < 0x20 { - return s.error(c, "in string literal") - } - return scanContinue -} - -// stateInStringEsc is the state after reading `"\` during a quoted string. -func stateInStringEsc(s *scanner, c byte) int { - switch c { - case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': - s.step = stateInString - return scanContinue - case 'u': - s.step = stateInStringEscU - return scanContinue - } - return s.error(c, "in string escape code") -} - -// stateInStringEscU is the state after reading `"\u` during a quoted string. -func stateInStringEscU(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU1 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. -func stateInStringEscU1(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU12 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. -func stateInStringEscU12(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInStringEscU123 - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. -func stateInStringEscU123(s *scanner, c byte) int { - if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - s.step = stateInString - return scanContinue - } - // numbers - return s.error(c, "in \\u hexadecimal character escape") -} - -// stateNeg is the state after reading `-` during a number. -func stateNeg(s *scanner, c byte) int { - if c == '0' { - s.step = state0 - return scanContinue - } - if '1' <= c && c <= '9' { - s.step = state1 - return scanContinue - } - return s.error(c, "in numeric literal") -} - -// state1 is the state after reading a non-zero integer during a number, -// such as after reading `1` or `100` but not `0`. -func state1(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - s.step = state1 - return scanContinue - } - return state0(s, c) -} - -// state0 is the state after reading `0` during a number. -func state0(s *scanner, c byte) int { - if c == '.' { - s.step = stateDot - return scanContinue - } - if c == 'e' || c == 'E' { - s.step = stateE - return scanContinue - } - return stateEndValue(s, c) -} - -// stateDot is the state after reading the integer and decimal point in a number, -// such as after reading `1.`. -func stateDot(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - s.step = stateDot0 - return scanContinue - } - return s.error(c, "after decimal point in numeric literal") -} - -// stateDot0 is the state after reading the integer, decimal point, and subsequent -// digits of a number, such as after reading `3.14`. -func stateDot0(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - return scanContinue - } - if c == 'e' || c == 'E' { - s.step = stateE - return scanContinue - } - return stateEndValue(s, c) -} - -// stateE is the state after reading the mantissa and e in a number, -// such as after reading `314e` or `0.314e`. -func stateE(s *scanner, c byte) int { - if c == '+' || c == '-' { - s.step = stateESign - return scanContinue - } - return stateESign(s, c) -} - -// stateESign is the state after reading the mantissa, e, and sign in a number, -// such as after reading `314e-` or `0.314e+`. -func stateESign(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - s.step = stateE0 - return scanContinue - } - return s.error(c, "in exponent of numeric literal") -} - -// stateE0 is the state after reading the mantissa, e, optional sign, -// and at least one digit of the exponent in a number, -// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. -func stateE0(s *scanner, c byte) int { - if '0' <= c && c <= '9' { - return scanContinue - } - return stateEndValue(s, c) -} - -// stateT is the state after reading `t`. -func stateT(s *scanner, c byte) int { - if c == 'r' { - s.step = stateTr - return scanContinue +func transformSyntacticError(err error) error { + switch serr, ok := err.(*jsontext.SyntacticError); { + case serr != nil: + return &SyntaxError{Offset: serr.ByteOffset, msg: serr.Error()} + case ok: + return (*SyntaxError)(nil) + case export.IsIOError(err): + return errors.Unwrap(err) // v1 historically did not wrap IO errors + default: + return err } - return s.error(c, "in literal true (expecting 'r')") -} - -// stateTr is the state after reading `tr`. -func stateTr(s *scanner, c byte) int { - if c == 'u' { - s.step = stateTru - return scanContinue - } - return s.error(c, "in literal true (expecting 'u')") -} - -// stateTru is the state after reading `tru`. -func stateTru(s *scanner, c byte) int { - if c == 'e' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal true (expecting 'e')") -} - -// stateF is the state after reading `f`. -func stateF(s *scanner, c byte) int { - if c == 'a' { - s.step = stateFa - return scanContinue - } - return s.error(c, "in literal false (expecting 'a')") -} - -// stateFa is the state after reading `fa`. -func stateFa(s *scanner, c byte) int { - if c == 'l' { - s.step = stateFal - return scanContinue - } - return s.error(c, "in literal false (expecting 'l')") -} - -// stateFal is the state after reading `fal`. -func stateFal(s *scanner, c byte) int { - if c == 's' { - s.step = stateFals - return scanContinue - } - return s.error(c, "in literal false (expecting 's')") -} - -// stateFals is the state after reading `fals`. -func stateFals(s *scanner, c byte) int { - if c == 'e' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal false (expecting 'e')") -} - -// stateN is the state after reading `n`. -func stateN(s *scanner, c byte) int { - if c == 'u' { - s.step = stateNu - return scanContinue - } - return s.error(c, "in literal null (expecting 'u')") -} - -// stateNu is the state after reading `nu`. -func stateNu(s *scanner, c byte) int { - if c == 'l' { - s.step = stateNul - return scanContinue - } - return s.error(c, "in literal null (expecting 'l')") -} - -// stateNul is the state after reading `nul`. -func stateNul(s *scanner, c byte) int { - if c == 'l' { - s.step = stateEndValue - return scanContinue - } - return s.error(c, "in literal null (expecting 'l')") -} - -// stateError is the state after reaching a syntax error, -// such as after reading `[1}` or `5.1.2`. -func stateError(s *scanner, c byte) int { - return scanError -} - -// error records an error and switches to the error state. -func (s *scanner) error(c byte, context string) int { - s.step = stateError - s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} - return scanError -} - -// quoteChar formats c as a quoted character literal. -func quoteChar(c byte) string { - // special cases - different from quoted strings - if c == '\'' { - return `'\''` - } - if c == '"' { - return `'"'` - } - - // use quoted string with different quotation marks - s := strconv.Quote(string(c)) - return "'" + s[1:len(s)-1] + "'" } diff --git a/v1/scanner_test.go b/v1/scanner_test.go index 068439d..2694b2c 100644 --- a/v1/scanner_test.go +++ b/v1/scanner_test.go @@ -195,6 +195,7 @@ func TestIndentErrors(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) slice := make([]uint8, 0) buf := bytes.NewBuffer(slice) if err := Indent(buf, []uint8(tt.in), "", ""); err != nil { diff --git a/v1/stream.go b/v1/stream.go index e2d9470..d7f06d1 100644 --- a/v1/stream.go +++ b/v1/stream.go @@ -6,22 +6,17 @@ package json import ( "bytes" - "errors" "io" + + jsonv2 "github.com/go-json-experiment/json" + "github.com/go-json-experiment/json/jsontext" ) // A Decoder reads and decodes JSON values from an input stream. type Decoder struct { - r io.Reader - buf []byte - d decodeState - scanp int // start of unread data in buf - scanned int64 // amount of data already scanned - scan scanner - err error - - tokenState int - tokenStack []int + dec *jsontext.Decoder + opts jsonv2.Options + err error } // NewDecoder returns a new decoder that reads from r. @@ -29,17 +24,35 @@ type Decoder struct { // The decoder introduces its own buffering and may // read data from r beyond the JSON values requested. func NewDecoder(r io.Reader) *Decoder { - return &Decoder{r: r} + // Hide bytes.Buffer from jsontext since it implements optimizations that + // also limits certain ways it could be used. For example, one cannot write + // to the bytes.Buffer while it is in use by jsontext.Decoder. + if _, ok := r.(*bytes.Buffer); ok { + r = struct{ io.Reader }{r} + } + + dec := new(Decoder) + dec.opts = DefaultOptionsV1() + dec.dec = jsontext.NewDecoder(r, dec.opts) + return dec } // UseNumber causes the Decoder to unmarshal a number into an // interface value as a [Number] instead of as a float64. -func (dec *Decoder) UseNumber() { dec.d.useNumber = true } +func (dec *Decoder) UseNumber() { + if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); !useNumber { + dec.opts = jsonv2.JoinOptions(dec.opts, unmarshalAnyWithRawNumber(true)) + } +} // DisallowUnknownFields causes the Decoder to return an error when the destination // is a struct and the input contains object keys which do not match any // non-ignored, exported fields in the destination. -func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true } +func (dec *Decoder) DisallowUnknownFields() { + if reject, _ := jsonv2.GetOption(dec.opts, jsonv2.RejectUnknownMembers); !reject { + dec.opts = jsonv2.JoinOptions(dec.opts, jsonv2.RejectUnknownMembers(true)) + } +} // Decode reads the next JSON-encoded value from its // input and stores it in the value pointed to by v. @@ -50,151 +63,43 @@ func (dec *Decoder) Decode(v any) error { if dec.err != nil { return dec.err } - - if err := dec.tokenPrepareForDecode(); err != nil { - return err - } - - if !dec.tokenValueAllowed() { - return &SyntaxError{msg: "not at beginning of value", Offset: dec.InputOffset()} - } - - // Read whole value into buffer. - n, err := dec.readValue() + data, err := dec.dec.ReadValue() if err != nil { + err = transformSyntacticError(err) + dec.err = err return err } - dec.d.init(dec.buf[dec.scanp : dec.scanp+n]) - dec.scanp += n - - // Don't save err from unmarshal into dec.err: - // the connection is still usable since we read a complete JSON - // object from it before the error happened. - err = dec.d.unmarshal(v) - - // fixup token streaming state - dec.tokenValueEnd() - - return err + return jsonv2.Unmarshal(data, v, dec.opts) } // Buffered returns a reader of the data remaining in the Decoder's // buffer. The reader is valid until the next call to [Decoder.Decode]. func (dec *Decoder) Buffered() io.Reader { - return bytes.NewReader(dec.buf[dec.scanp:]) -} - -// readValue reads a JSON value into dec.buf. -// It returns the length of the encoding. -func (dec *Decoder) readValue() (int, error) { - dec.scan.reset() - - scanp := dec.scanp - var err error -Input: - // help the compiler see that scanp is never negative, so it can remove - // some bounds checks below. - for scanp >= 0 { - - // Look in the buffer for a new value. - for ; scanp < len(dec.buf); scanp++ { - c := dec.buf[scanp] - dec.scan.bytes++ - switch dec.scan.step(&dec.scan, c) { - case scanEnd: - // scanEnd is delayed one byte so we decrement - // the scanner bytes count by 1 to ensure that - // this value is correct in the next call of Decode. - dec.scan.bytes-- - break Input - case scanEndObject, scanEndArray: - // scanEnd is delayed one byte. - // We might block trying to get that byte from src, - // so instead invent a space byte. - if stateEndValue(&dec.scan, ' ') == scanEnd { - scanp++ - break Input - } - case scanError: - dec.err = dec.scan.err - return 0, dec.scan.err - } - } - - // Did the last read have an error? - // Delayed until now to allow buffer scan. - if err != nil { - if err == io.EOF { - if dec.scan.step(&dec.scan, ' ') == scanEnd { - break Input - } - if nonSpace(dec.buf) { - err = io.ErrUnexpectedEOF - } - } - dec.err = err - return 0, err - } - - n := scanp - dec.scanp - err = dec.refill() - scanp = dec.scanp + n - } - return scanp - dec.scanp, nil -} - -func (dec *Decoder) refill() error { - // Make room to read more into the buffer. - // First slide down data already consumed. - if dec.scanp > 0 { - dec.scanned += int64(dec.scanp) - n := copy(dec.buf, dec.buf[dec.scanp:]) - dec.buf = dec.buf[:n] - dec.scanp = 0 - } - - // Grow buffer if not large enough. - const minRead = 512 - if cap(dec.buf)-len(dec.buf) < minRead { - newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead) - copy(newBuf, dec.buf) - dec.buf = newBuf - } - - // Read. Delay error for next iteration (after scan). - n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)]) - dec.buf = dec.buf[0 : len(dec.buf)+n] - - return err -} - -func nonSpace(b []byte) bool { - for _, c := range b { - if !isSpace(c) { - return true - } - } - return false + return bytes.NewReader(dec.dec.UnreadBuffer()) } // An Encoder writes JSON values to an output stream. type Encoder struct { - w io.Writer - err error - escapeHTML bool + w io.Writer + opts jsonv2.Options + err error + + buf bytes.Buffer + indentBuf bytes.Buffer - indentBuf []byte indentPrefix string indentValue string } // NewEncoder returns a new encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { - return &Encoder{w: w, escapeHTML: true} + enc := new(Encoder) + enc.w = w + enc.opts = DefaultOptionsV1() + return enc } // Encode writes the JSON encoding of v to the stream, -// with insignificant space characters elided, // followed by a newline character. // // See the documentation for [Marshal] for details about the @@ -204,34 +109,25 @@ func (enc *Encoder) Encode(v any) error { return enc.err } - e := newEncodeState() - defer encodeStatePool.Put(e) - - err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}) - if err != nil { + buf := &enc.buf + buf.Reset() + if err := jsonv2.MarshalWrite(buf, v, enc.opts); err != nil { return err } - - // Terminate each value with a newline. - // This makes the output look a little nicer - // when debugging, and some kind of space - // is required if the encoded value was a number, - // so that the reader knows there aren't more - // digits coming. - e.WriteByte('\n') - - b := e.Bytes() - if enc.indentPrefix != "" || enc.indentValue != "" { - enc.indentBuf, err = appendIndent(enc.indentBuf[:0], b, enc.indentPrefix, enc.indentValue) - if err != nil { + if len(enc.indentPrefix)+len(enc.indentValue) > 0 { + enc.indentBuf.Reset() + if err := Indent(&enc.indentBuf, buf.Bytes(), enc.indentPrefix, enc.indentValue); err != nil { return err } - b = enc.indentBuf + buf = &enc.indentBuf } - if _, err = enc.w.Write(b); err != nil { + buf.WriteByte('\n') + + if _, err := enc.w.Write(buf.Bytes()); err != nil { enc.err = err + return err } - return err + return nil } // SetIndent instructs the encoder to format each subsequent encoded @@ -250,33 +146,15 @@ func (enc *Encoder) SetIndent(prefix, indent string) { // In non-HTML settings where the escaping interferes with the readability // of the output, SetEscapeHTML(false) disables this behavior. func (enc *Encoder) SetEscapeHTML(on bool) { - enc.escapeHTML = on + if escape, _ := jsonv2.GetOption(enc.opts, jsontext.EscapeForHTML); escape != on { + enc.opts = jsonv2.JoinOptions(enc.opts, jsontext.EscapeForHTML(on)) + } } // RawMessage is a raw encoded JSON value. // It implements [Marshaler] and [Unmarshaler] and can // be used to delay JSON decoding or precompute a JSON encoding. -type RawMessage []byte - -// MarshalJSON returns m as the JSON encoding of m. -func (m RawMessage) MarshalJSON() ([]byte, error) { - if m == nil { - return []byte("null"), nil - } - return m, nil -} - -// UnmarshalJSON sets *m to a copy of data. -func (m *RawMessage) UnmarshalJSON(data []byte) error { - if m == nil { - return errors.New("json.RawMessage: UnmarshalJSON on nil pointer") - } - *m = append((*m)[0:0], data...) - return nil -} - -var _ Marshaler = (*RawMessage)(nil) -var _ Unmarshaler = (*RawMessage)(nil) +type RawMessage = jsontext.Value // A Token holds a value of one of these types: // @@ -288,65 +166,6 @@ var _ Unmarshaler = (*RawMessage)(nil) // - nil, for JSON null type Token any -const ( - tokenTopValue = iota - tokenArrayStart - tokenArrayValue - tokenArrayComma - tokenObjectStart - tokenObjectKey - tokenObjectColon - tokenObjectValue - tokenObjectComma -) - -// advance tokenstate from a separator state to a value state -func (dec *Decoder) tokenPrepareForDecode() error { - // Note: Not calling peek before switch, to avoid - // putting peek into the standard Decode path. - // peek is only called when using the Token API. - switch dec.tokenState { - case tokenArrayComma: - c, err := dec.peek() - if err != nil { - return err - } - if c != ',' { - return &SyntaxError{"expected comma after array element", dec.InputOffset()} - } - dec.scanp++ - dec.tokenState = tokenArrayValue - case tokenObjectColon: - c, err := dec.peek() - if err != nil { - return err - } - if c != ':' { - return &SyntaxError{"expected colon after object key", dec.InputOffset()} - } - dec.scanp++ - dec.tokenState = tokenObjectValue - } - return nil -} - -func (dec *Decoder) tokenValueAllowed() bool { - switch dec.tokenState { - case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue: - return true - } - return false -} - -func (dec *Decoder) tokenValueEnd() { - switch dec.tokenState { - case tokenArrayStart, tokenArrayValue: - dec.tokenState = tokenArrayComma - case tokenObjectValue: - dec.tokenState = tokenObjectComma - } -} - // A Delim is a JSON array or object delimiter, one of [ ] { or }. type Delim rune @@ -366,147 +185,47 @@ func (d Delim) String() string { // to mark the start and end of arrays and objects. // Commas and colons are elided. func (dec *Decoder) Token() (Token, error) { - for { - c, err := dec.peek() - if err != nil { - return nil, err - } - switch c { - case '[': - if !dec.tokenValueAllowed() { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenStack = append(dec.tokenStack, dec.tokenState) - dec.tokenState = tokenArrayStart - return Delim('['), nil - - case ']': - if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] - dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] - dec.tokenValueEnd() - return Delim(']'), nil - - case '{': - if !dec.tokenValueAllowed() { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenStack = append(dec.tokenStack, dec.tokenState) - dec.tokenState = tokenObjectStart - return Delim('{'), nil - - case '}': - if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1] - dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1] - dec.tokenValueEnd() - return Delim('}'), nil - - case ':': - if dec.tokenState != tokenObjectColon { - return dec.tokenError(c) - } - dec.scanp++ - dec.tokenState = tokenObjectValue - continue - - case ',': - if dec.tokenState == tokenArrayComma { - dec.scanp++ - dec.tokenState = tokenArrayValue - continue - } - if dec.tokenState == tokenObjectComma { - dec.scanp++ - dec.tokenState = tokenObjectKey - continue - } - return dec.tokenError(c) - - case '"': - if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey { - var x string - old := dec.tokenState - dec.tokenState = tokenTopValue - err := dec.Decode(&x) - dec.tokenState = old - if err != nil { - return nil, err - } - dec.tokenState = tokenObjectColon - return x, nil - } - fallthrough - - default: - if !dec.tokenValueAllowed() { - return dec.tokenError(c) - } - var x any - if err := dec.Decode(&x); err != nil { - return nil, err - } - return x, nil + tok, err := dec.dec.ReadToken() + if err != nil { + return nil, transformSyntacticError(err) + } + switch tok.Kind() { + case 'n': + return nil, nil + case 'f': + return false, nil + case 't': + return true, nil + case '"': + return tok.String(), nil + case '0': + if useNumber, _ := jsonv2.GetOption(dec.opts, unmarshalAnyWithRawNumber); useNumber { + return Number(tok.String()), nil } + return tok.Float(), nil + case '{': + return Delim('{'), nil + case '}': + return Delim('}'), nil + case '[': + return Delim('['), nil + case ']': + return Delim(']'), nil + default: + panic("unreachable") } } -func (dec *Decoder) tokenError(c byte) (Token, error) { - var context string - switch dec.tokenState { - case tokenTopValue: - context = " looking for beginning of value" - case tokenArrayStart, tokenArrayValue, tokenObjectValue: - context = " looking for beginning of value" - case tokenArrayComma: - context = " after array element" - case tokenObjectKey: - context = " looking for beginning of object key string" - case tokenObjectColon: - context = " after object key" - case tokenObjectComma: - context = " after object key:value pair" - } - return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} -} - // More reports whether there is another element in the // current array or object being parsed. func (dec *Decoder) More() bool { - c, err := dec.peek() - return err == nil && c != ']' && c != '}' -} - -func (dec *Decoder) peek() (byte, error) { - var err error - for { - for i := dec.scanp; i < len(dec.buf); i++ { - c := dec.buf[i] - if isSpace(c) { - continue - } - dec.scanp = i - return c, nil - } - // buffer has been scanned, now report any error - if err != nil { - return 0, err - } - err = dec.refill() - } + k := dec.dec.PeekKind() + return k > 0 && k != ']' && k != '}' } // InputOffset returns the input stream byte offset of the current decoder position. // The offset gives the location of the end of the most recently returned token // and the beginning of the next token. func (dec *Decoder) InputOffset() int64 { - return dec.scanned + int64(dec.scanp) + return dec.dec.InputOffset() } diff --git a/v1/stream_test.go b/v1/stream_test.go index 32ede8c..53e3a99 100644 --- a/v1/stream_test.go +++ b/v1/stream_test.go @@ -6,43 +6,23 @@ package json import ( "bytes" - "fmt" "io" "log" "net" "net/http" "net/http/httptest" - "path" "reflect" - "runtime" "runtime/debug" "strings" "testing" -) - -// TODO(https://go.dev/issue/52751): Replace with native testing support. - -// CaseName is a case name annotated with a file and line. -type CaseName struct { - Name string - Where CasePos -} -// Name annotates a case name with the file and line of the caller. -func Name(s string) (c CaseName) { - c.Name = s - runtime.Callers(2, c.Where.pc[:]) - return c -} + "github.com/go-json-experiment/json/internal/jsontest" +) -// CasePos represents a file and line number. -type CasePos struct{ pc [1]uintptr } +type CaseName = jsontest.CaseName +type CasePos = jsontest.CasePos -func (pos CasePos) String() string { - frames := runtime.CallersFrames(pos.pc[:]) - frame, _ := frames.Next() - return fmt.Sprintf("%s:%d", path.Base(frame.File), frame.Line) -} +var Name = jsontest.Name // Test values for the stream test. // One of each JSON kind. @@ -79,9 +59,9 @@ func TestEncoder(t *testing.T) { t.Fatalf("#%d.%d Encode error: %v", i, j, err) } } - if have, want := buf.String(), nlines(streamEncoded, i); have != want { + if got, want := buf.String(), nlines(streamEncoded, i); got != want { t.Errorf("encoding %d items: mismatch:", i) - diff(t, []byte(have), []byte(want)) + diff(t, []byte(got), []byte(want)) break } } @@ -148,9 +128,9 @@ func TestEncoderIndent(t *testing.T) { for _, v := range streamTest { enc.Encode(v) } - if have, want := buf.String(), streamEncodedIndent; have != want { - t.Error("Encode mismatch:") - diff(t, []byte(have), []byte(want)) + if got, want := buf.String(), streamEncodedIndent; got != want { + t.Errorf("Encode mismatch:\ngot:\n%s\n\nwant:\n%s", got, want) + diff(t, []byte(got), []byte(want)) } } @@ -214,6 +194,7 @@ func TestEncoderSetEscapeHTML(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) var buf strings.Builder enc := NewEncoder(&buf) if err := enc.Encode(tt.v); err != nil { @@ -304,6 +285,7 @@ func nlines(s string, n int) string { } func TestRawMessage(t *testing.T) { + skipKnownFailure(t) var data struct { X float64 Id RawMessage @@ -460,6 +442,7 @@ func TestDecodeInStream(t *testing.T) { } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { + skipKnownFailure(t) dec := NewDecoder(strings.NewReader(tt.json)) for i, want := range tt.expTokens { var got any diff --git a/v1/tables.go b/v1/tables.go deleted file mode 100644 index 10acdc1..0000000 --- a/v1/tables.go +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package json - -import "unicode/utf8" - -// safeSet holds the value true if the ASCII character with the given array -// position can be represented inside a JSON string without any further -// escaping. -// -// All values are true except for the ASCII control characters (0-31), the -// double quote ("), and the backslash character ("\"). -var safeSet = [utf8.RuneSelf]bool{ - ' ': true, - '!': true, - '"': false, - '#': true, - '$': true, - '%': true, - '&': true, - '\'': true, - '(': true, - ')': true, - '*': true, - '+': true, - ',': true, - '-': true, - '.': true, - '/': true, - '0': true, - '1': true, - '2': true, - '3': true, - '4': true, - '5': true, - '6': true, - '7': true, - '8': true, - '9': true, - ':': true, - ';': true, - '<': true, - '=': true, - '>': true, - '?': true, - '@': true, - 'A': true, - 'B': true, - 'C': true, - 'D': true, - 'E': true, - 'F': true, - 'G': true, - 'H': true, - 'I': true, - 'J': true, - 'K': true, - 'L': true, - 'M': true, - 'N': true, - 'O': true, - 'P': true, - 'Q': true, - 'R': true, - 'S': true, - 'T': true, - 'U': true, - 'V': true, - 'W': true, - 'X': true, - 'Y': true, - 'Z': true, - '[': true, - '\\': false, - ']': true, - '^': true, - '_': true, - '`': true, - 'a': true, - 'b': true, - 'c': true, - 'd': true, - 'e': true, - 'f': true, - 'g': true, - 'h': true, - 'i': true, - 'j': true, - 'k': true, - 'l': true, - 'm': true, - 'n': true, - 'o': true, - 'p': true, - 'q': true, - 'r': true, - 's': true, - 't': true, - 'u': true, - 'v': true, - 'w': true, - 'x': true, - 'y': true, - 'z': true, - '{': true, - '|': true, - '}': true, - '~': true, - '\u007f': true, -} - -// htmlSafeSet holds the value true if the ASCII character with the given -// array position can be safely represented inside a JSON string, embedded -// inside of HTML