From 2cde0eaeeab6bb47e61f441372a4dd4c5092cd06 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 6 Apr 2019 05:25:23 -0700 Subject: [PATCH] json module faster implementation (#173) * json module faster implementation * add some decoding error test --- docs/stdlib-json.md | 19 +- stdlib/json.go | 103 ++++++-- stdlib/json/decode.go | 374 ++++++++++++++++++++++++++ stdlib/json/encode.go | 147 ++++++++++ stdlib/json/json_test.go | 109 ++++++++ stdlib/json/scanner.go | 559 +++++++++++++++++++++++++++++++++++++++ stdlib/json_test.go | 64 +++-- 7 files changed, 1324 insertions(+), 51 deletions(-) create mode 100644 stdlib/json/decode.go create mode 100644 stdlib/json/encode.go create mode 100644 stdlib/json/json_test.go create mode 100644 stdlib/json/scanner.go diff --git a/docs/stdlib-json.md b/docs/stdlib-json.md index f0633801..fd87bf19 100644 --- a/docs/stdlib-json.md +++ b/docs/stdlib-json.md @@ -6,5 +6,20 @@ json := import("json") ## Functions -- `parse(v)`: Parses the JSON string and returns an object. -- `stringify(v)`: Returns the JSON string representation of the object. +- `decode(b string/bytes) => object`: Parses the JSON string and returns an object. +- `encode(o object) => bytes`: Returns the JSON string (bytes) of the object. Unlike Go's JSON package, this function does not HTML-escape texts, but, one can use `html_escape` function if needed. +- `indent(b string/bytes) => bytes`: Returns an indented form of input JSON bytes string. +- `html_escape(b string/bytes) => bytes`: Return an HTML-safe form of input JSON bytes string. + + +## Examples + +```golang +json := import("json") + +encoded := json.encode({a: 1, b: [2, 3, 4]}) // JSON-encoded bytes string +indentded := json.indent(encoded) // indented form +html_safe := json.html_escape(encoded) // HTML escaped form + +decoded := json.decode(encoded) // {a: 1, b: [2, 3, 4]} +``` diff --git a/stdlib/json.go b/stdlib/json.go index 5650027f..f913dc48 100644 --- a/stdlib/json.go +++ b/stdlib/json.go @@ -1,35 +1,38 @@ package stdlib import ( - "encoding/json" + "bytes" + gojson "encoding/json" - "github.com/d5/tengo" "github.com/d5/tengo/objects" + "github.com/d5/tengo/stdlib/json" ) var jsonModule = map[string]objects.Object{ - "parse": &objects.UserFunction{Name: "parse", Value: jsonParse}, - "stringify": &objects.UserFunction{Name: "stringify", Value: jsonStringify}, + "decode": &objects.UserFunction{Name: "decode", Value: jsonDecode}, + "encode": &objects.UserFunction{Name: "encode", Value: jsonEncode}, + "indent": &objects.UserFunction{Name: "encode", Value: jsonIndent}, + "html_escape": &objects.UserFunction{Name: "html_escape", Value: jsonHTMLEscape}, } -func jsonParse(args ...objects.Object) (ret objects.Object, err error) { +func jsonDecode(args ...objects.Object) (ret objects.Object, err error) { if len(args) != 1 { return nil, objects.ErrWrongNumArguments } - var target interface{} - switch o := args[0].(type) { case *objects.Bytes: - err := json.Unmarshal(o.Value, &target) + v, err := json.Decode(o.Value) if err != nil { return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil } + return v, nil case *objects.String: - err := json.Unmarshal([]byte(o.Value), &target) + v, err := json.Decode([]byte(o.Value)) if err != nil { return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil } + return v, nil default: return nil, objects.ErrInvalidArgumentType{ Name: "first", @@ -37,33 +40,87 @@ func jsonParse(args ...objects.Object) (ret objects.Object, err error) { Found: args[0].TypeName(), } } +} + +func jsonEncode(args ...objects.Object) (ret objects.Object, err error) { + if len(args) != 1 { + return nil, objects.ErrWrongNumArguments + } - res, err := objects.FromInterface(target) + b, err := json.Encode(args[0]) if err != nil { - return nil, err + return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil } - return res, nil + return &objects.Bytes{Value: b}, nil } -func jsonStringify(args ...objects.Object) (ret objects.Object, err error) { - if len(args) != 1 { +func jsonIndent(args ...objects.Object) (ret objects.Object, err error) { + if len(args) != 3 { return nil, objects.ErrWrongNumArguments } - v := objects.ToInterface(args[0]) - if vErr, isErr := v.(error); isErr { - v = vErr.Error() + prefix, ok := objects.ToString(args[1]) + if !ok { + return nil, objects.ErrInvalidArgumentType{ + Name: "prefix", + Expected: "string(compatible)", + Found: args[1].TypeName(), + } } - res, err := json.Marshal(v) - if err != nil { - return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil + indent, ok := objects.ToString(args[2]) + if !ok { + return nil, objects.ErrInvalidArgumentType{ + Name: "indent", + Expected: "string(compatible)", + Found: args[2].TypeName(), + } + } + + switch o := args[0].(type) { + case *objects.Bytes: + var dst bytes.Buffer + err := gojson.Indent(&dst, o.Value, prefix, indent) + if err != nil { + return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil + } + return &objects.Bytes{Value: dst.Bytes()}, nil + case *objects.String: + var dst bytes.Buffer + err := gojson.Indent(&dst, []byte(o.Value), prefix, indent) + if err != nil { + return &objects.Error{Value: &objects.String{Value: err.Error()}}, nil + } + return &objects.Bytes{Value: dst.Bytes()}, nil + default: + return nil, objects.ErrInvalidArgumentType{ + Name: "first", + Expected: "bytes/string", + Found: args[0].TypeName(), + } } +} - if len(res) > tengo.MaxBytesLen { - return nil, objects.ErrBytesLimit +func jsonHTMLEscape(args ...objects.Object) (ret objects.Object, err error) { + if len(args) != 1 { + return nil, objects.ErrWrongNumArguments } - return &objects.String{Value: string(res)}, nil + switch o := args[0].(type) { + case *objects.Bytes: + var dst bytes.Buffer + gojson.HTMLEscape(&dst, o.Value) + return &objects.Bytes{Value: dst.Bytes()}, nil + case *objects.String: + var dst bytes.Buffer + gojson.HTMLEscape(&dst, []byte(o.Value)) + return &objects.Bytes{Value: dst.Bytes()}, nil + default: + return nil, objects.ErrInvalidArgumentType{ + Name: "first", + Expected: "bytes/string", + Found: args[0].TypeName(), + } + } } diff --git a/stdlib/json/decode.go b/stdlib/json/decode.go new file mode 100644 index 00000000..5a3fe6c7 --- /dev/null +++ b/stdlib/json/decode.go @@ -0,0 +1,374 @@ +// A modified version of Go's JSON implementation. + +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "strconv" + "unicode" + "unicode/utf16" + "unicode/utf8" + + "github.com/d5/tengo/objects" +) + +// Decode parses the JSON-encoded data and returns the result object. +func Decode(data []byte) (objects.Object, error) { + var d decodeState + err := checkValid(data, &d.scan) + if err != nil { + return nil, err + } + + d.init(data) + d.scan.reset() + d.scanWhile(scanSkipSpace) + + return d.value() +} + +// decodeState represents the state while decoding a JSON value. +type decodeState struct { + data []byte + off int // next read offset in data + opcode int // last read result + scan scanner +} + +// readIndex returns the position of the last byte read. +func (d *decodeState) readIndex() int { + return d.off - 1 +} + +const phasePanicMsg = "JSON decoder out of sync - data changing underfoot?" + +func (d *decodeState) init(data []byte) *decodeState { + d.data = data + d.off = 0 + return d +} + +// scanNext processes the byte at d.data[d.off]. +func (d *decodeState) scanNext() { + if d.off < len(d.data) { + d.opcode = d.scan.step(&d.scan, d.data[d.off]) + d.off++ + } else { + d.opcode = d.scan.eof() + d.off = len(d.data) + 1 // mark processed EOF with len+1 + } +} + +// scanWhile processes bytes in d.data[d.off:] until it +// receives a scan code not equal to op. +func (d *decodeState) scanWhile(op int) { + s, data, i := &d.scan, d.data, d.off + for i < len(data) { + newOp := s.step(s, data[i]) + i++ + if newOp != op { + d.opcode = newOp + d.off = i + return + } + } + + d.off = len(data) + 1 // mark processed EOF with len+1 + d.opcode = d.scan.eof() +} + +func (d *decodeState) value() (objects.Object, error) { + switch d.opcode { + default: + panic(phasePanicMsg) + + case scanBeginArray: + o, err := d.array() + if err != nil { + return nil, err + } + + d.scanNext() + + return o, nil + + case scanBeginObject: + o, err := d.object() + if err != nil { + return nil, err + } + + d.scanNext() + + return o, nil + + case scanBeginLiteral: + return d.literal() + } +} + +func (d *decodeState) array() (objects.Object, error) { + var arr []objects.Object + for { + // Look ahead for ] - can only happen on first iteration. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndArray { + break + } + + o, err := d.value() + if err != nil { + return nil, err + } + arr = append(arr, o) + + // Next token must be , or ]. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndArray { + break + } + if d.opcode != scanArrayValue { + panic(phasePanicMsg) + } + } + + return &objects.Array{Value: arr}, nil +} + +func (d *decodeState) object() (objects.Object, error) { + m := make(map[string]objects.Object) + for { + // Read opening " of string key or closing }. + d.scanWhile(scanSkipSpace) + if d.opcode == scanEndObject { + // closing } - can only happen on first iteration. + break + } + if d.opcode != scanBeginLiteral { + panic(phasePanicMsg) + } + + // Read string key. + start := d.readIndex() + d.scanWhile(scanContinue) + item := d.data[start:d.readIndex()] + key, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + + // Read : before value. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode != scanObjectKey { + panic(phasePanicMsg) + } + d.scanWhile(scanSkipSpace) + + // Read value. + o, err := d.value() + if err != nil { + return nil, err + } + + m[key] = o + + // Next token must be , or }. + if d.opcode == scanSkipSpace { + d.scanWhile(scanSkipSpace) + } + if d.opcode == scanEndObject { + break + } + if d.opcode != scanObjectValue { + panic(phasePanicMsg) + } + } + + return &objects.Map{Value: m}, nil +} + +func (d *decodeState) literal() (objects.Object, error) { + // All bytes inside literal return scanContinue op code. + start := d.readIndex() + d.scanWhile(scanContinue) + + item := d.data[start:d.readIndex()] + + switch c := item[0]; c { + case 'n': // null + return objects.UndefinedValue, nil + + case 't', 'f': // true, false + if c == 't' { + return objects.TrueValue, nil + } + return objects.FalseValue, nil + + case '"': // string + s, ok := unquote(item) + if !ok { + panic(phasePanicMsg) + } + return &objects.String{Value: s}, nil + + default: // number + if c != '-' && (c < '0' || c > '9') { + panic(phasePanicMsg) + } + + n, _ := strconv.ParseFloat(string(item), 10) + return &objects.Float{Value: n}, nil + } +} + +// getu4 decodes \uXXXX from the beginning of s, returning the hex value, +// or it returns -1. +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + +// unquote converts a quoted JSON string literal s into an actual string t. +// The rules are different than for Go, so cannot use strconv.Unquote. +func unquote(s []byte) (t string, ok bool) { + s, ok = unquoteBytes(s) + t = string(s) + return +} + +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +} diff --git a/stdlib/json/encode.go b/stdlib/json/encode.go new file mode 100644 index 00000000..2b8b17eb --- /dev/null +++ b/stdlib/json/encode.go @@ -0,0 +1,147 @@ +// A modified version of Go's JSON implementation. + +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import ( + "encoding/base64" + "errors" + "math" + "strconv" + + "github.com/d5/tengo/objects" +) + +// Encode returns the JSON encoding of the object. +func Encode(o objects.Object) ([]byte, error) { + var b []byte + + switch o := o.(type) { + case *objects.Array: + b = append(b, '[') + len1 := len(o.Value) - 1 + for idx, elem := range o.Value { + eb, err := Encode(elem) + if err != nil { + return nil, err + } + b = append(b, eb...) + if idx < len1 { + b = append(b, ',') + } + } + b = append(b, ']') + case *objects.ImmutableArray: + b = append(b, '[') + len1 := len(o.Value) - 1 + for idx, elem := range o.Value { + eb, err := Encode(elem) + if err != nil { + return nil, err + } + b = append(b, eb...) + if idx < len1 { + b = append(b, ',') + } + } + b = append(b, ']') + case *objects.Map: + b = append(b, '{') + len1 := len(o.Value) - 1 + idx := 0 + for key, value := range o.Value { + b = strconv.AppendQuote(b, key) + b = append(b, ':') + eb, err := Encode(value) + if err != nil { + return nil, err + } + b = append(b, eb...) + if idx < len1 { + b = append(b, ',') + } + idx++ + } + b = append(b, '}') + case *objects.ImmutableMap: + b = append(b, '{') + len1 := len(o.Value) - 1 + idx := 0 + for key, value := range o.Value { + b = strconv.AppendQuote(b, key) + b = append(b, ':') + eb, err := Encode(value) + if err != nil { + return nil, err + } + b = append(b, eb...) + if idx < len1 { + b = append(b, ',') + } + idx++ + } + b = append(b, '}') + case *objects.Bool: + if o.IsFalsy() { + b = strconv.AppendBool(b, false) + } else { + b = strconv.AppendBool(b, true) + } + case *objects.Bytes: + b = append(b, '"') + encodedLen := base64.StdEncoding.EncodedLen(len(o.Value)) + dst := make([]byte, encodedLen) + base64.StdEncoding.Encode(dst, o.Value) + b = append(b, dst...) + b = append(b, '"') + case *objects.Char: + b = strconv.AppendInt(b, int64(o.Value), 10) + case *objects.Float: + var y []byte + + f := o.Value + if math.IsInf(f, 0) || math.IsNaN(f) { + return nil, errors.New("unsupported float value") + } + + // Convert as if by ES6 number to string conversion. + // This matches most other JSON generators. + abs := math.Abs(f) + fmt := byte('f') + if abs != 0 { + if abs < 1e-6 || abs >= 1e21 { + fmt = 'e' + } + } + y = strconv.AppendFloat(y, f, fmt, -1, 64) + if fmt == 'e' { + // clean up e-09 to e-9 + n := len(y) + if n >= 4 && y[n-4] == 'e' && y[n-3] == '-' && y[n-2] == '0' { + y[n-2] = y[n-1] + y = y[:n-1] + } + } + + b = append(b, y...) + case *objects.Int: + b = strconv.AppendInt(b, o.Value, 10) + case *objects.String: + b = strconv.AppendQuote(b, o.Value) + case *objects.Time: + y, err := o.Value.MarshalJSON() + if err != nil { + return nil, err + } + b = append(b, y...) + case *objects.Undefined: + b = append(b, "null"...) + default: + // unknown type: ignore + } + + return b, nil +} diff --git a/stdlib/json/json_test.go b/stdlib/json/json_test.go new file mode 100644 index 00000000..e20aed05 --- /dev/null +++ b/stdlib/json/json_test.go @@ -0,0 +1,109 @@ +package json_test + +import ( + gojson "encoding/json" + "testing" + + "github.com/d5/tengo/assert" + "github.com/d5/tengo/objects" + "github.com/d5/tengo/stdlib/json" +) + +type ARR = []interface{} +type MAP = map[string]interface{} + +func TestJSON(t *testing.T) { + testJSONEncodeDecode(t, nil) + + testJSONEncodeDecode(t, 0) + testJSONEncodeDecode(t, 1) + testJSONEncodeDecode(t, -1) + testJSONEncodeDecode(t, 1984) + testJSONEncodeDecode(t, -1984) + + testJSONEncodeDecode(t, 0.0) + testJSONEncodeDecode(t, 1.0) + testJSONEncodeDecode(t, -1.0) + testJSONEncodeDecode(t, 19.84) + testJSONEncodeDecode(t, -19.84) + + testJSONEncodeDecode(t, "") + testJSONEncodeDecode(t, "foo") + testJSONEncodeDecode(t, "foo bar") + testJSONEncodeDecode(t, "foo \"bar\"") + + testJSONEncodeDecode(t, true) + testJSONEncodeDecode(t, false) + + testJSONEncodeDecode(t, ARR{}) + testJSONEncodeDecode(t, ARR{0}) + testJSONEncodeDecode(t, ARR{false}) + testJSONEncodeDecode(t, ARR{1, 2, 3, "four", false}) + testJSONEncodeDecode(t, ARR{1, 2, 3, "four", false, MAP{"a": 0, "b": "bee", "bool": true}}) + + testJSONEncodeDecode(t, MAP{}) + testJSONEncodeDecode(t, MAP{"a": 0}) + testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee"}) + testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "bool": true}) + + testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "arr": ARR{1, 2, 3, "four"}}) + testJSONEncodeDecode(t, MAP{"a": 0, "b": "bee", "arr": ARR{1, 2, 3, MAP{"a": false, "b": 109.4}}}) +} + +func TestDecode(t *testing.T) { + testDecodeError(t, `{`) + testDecodeError(t, `}`) + testDecodeError(t, `{}a`) + testDecodeError(t, `{{}`) + testDecodeError(t, `{}}`) + testDecodeError(t, `[`) + testDecodeError(t, `]`) + testDecodeError(t, `[]a`) + testDecodeError(t, `[[]`) + testDecodeError(t, `[]]`) + testDecodeError(t, `"`) + testDecodeError(t, `"abc`) + testDecodeError(t, `abc"`) + testDecodeError(t, `.123`) + testDecodeError(t, `123.`) + testDecodeError(t, `1.2.3`) + testDecodeError(t, `'a'`) + testDecodeError(t, `true, false`) + testDecodeError(t, `{"a:"b"}`) + testDecodeError(t, `{a":"b"}`) + testDecodeError(t, `{"a":"b":"c"}`) +} + +func testDecodeError(t *testing.T, input string) { + _, err := json.Decode([]byte(input)) + assert.Error(t, err) +} + +func testJSONEncodeDecode(t *testing.T, v interface{}) bool { + o, err := objects.FromInterface(v) + if !assert.NoError(t, err) { + return false + } + + b, err := json.Encode(o) + if !assert.NoError(t, err) { + return false + } + + a, err := json.Decode(b) + if !assert.NoError(t, err, string(b)) { + return false + } + + vj, err := gojson.Marshal(v) + if !assert.NoError(t, err) { + return false + } + + aj, err := gojson.Marshal(objects.ToInterface(a)) + if !assert.NoError(t, err) { + return false + } + + return assert.Equal(t, vj, aj) +} diff --git a/stdlib/json/scanner.go b/stdlib/json/scanner.go new file mode 100644 index 00000000..8fc6776d --- /dev/null +++ b/stdlib/json/scanner.go @@ -0,0 +1,559 @@ +// A modified version of Go's JSON implementation. + +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package json + +import "strconv" + +func checkValid(data []byte, scan *scanner) error { + scan.reset() + for _, c := range data { + scan.bytes++ + if scan.step(scan, c) == scanError { + return scan.err + } + } + if scan.eof() == scanError { + return scan.err + } + return nil +} + +// A SyntaxError is a description of a JSON syntax error. +type SyntaxError struct { + msg string // description of error + Offset int64 // error occurred after reading Offset bytes +} + +func (e *SyntaxError) Error() string { return e.msg } + +// A scanner is a JSON scanning state machine. +// Callers call scan.reset() and then pass bytes in one at a time +// by calling scan.step(&scan, c) for each byte. +// The return value, referred to as an opcode, tells the +// caller about significant parsing events like beginning +// and ending literals, objects, and arrays, so that the +// caller can follow along if it wishes. +// The return value scanEnd indicates that a single top-level +// JSON value has been completed, *before* the byte that +// just got passed in. (The indication must be delayed in order +// to recognize the end of numbers: is 123 a whole value or +// the beginning of 12345e+6?). +type scanner struct { + // The step is a func to be called to execute the next transition. + // Also tried using an integer constant and a single func + // with a switch, but using the func directly was 10% faster + // on a 64-bit Mac Mini, and it's nicer to read. + step func(*scanner, byte) int + + // Reached end of top-level value. + endTop bool + + // Stack of what we're in the middle of - array values, object keys, object values. + parseState []int + + // Error that happened, if any. + err error + + // total bytes consumed, updated by decoder.Decode + bytes int64 +} + +// These values are returned by the state transition functions +// assigned to scanner.state and the method scanner.eof. +// They give details about the current state of the scan that +// callers might be interested to know about. +// It is okay to ignore the return value of any particular +// call to scanner.state: if one call returns scanError, +// every subsequent call will return scanError too. +const ( + // Continue. + scanContinue = iota // uninteresting byte + scanBeginLiteral // end implied by next result != scanContinue + scanBeginObject // begin object + scanObjectKey // just finished object key (string) + scanObjectValue // just finished non-last object value + scanEndObject // end object (implies scanObjectValue if possible) + scanBeginArray // begin array + scanArrayValue // just finished array value + scanEndArray // end array (implies scanArrayValue if possible) + scanSkipSpace // space byte; can skip; known to be last "continue" result + + // Stop. + scanEnd // top-level value ended *before* this byte; known to be first "stop" result + scanError // hit an error, scanner.err. +) + +// These values are stored in the parseState stack. +// They give the current state of a composite value +// being scanned. If the parser is inside a nested value +// the parseState describes the nested state, outermost at entry 0. +const ( + parseObjectKey = iota // parsing object key (before colon) + parseObjectValue // parsing object value (after colon) + parseArrayValue // parsing array value +) + +// reset prepares the scanner for use. +// It must be called before calling s.step. +func (s *scanner) reset() { + s.step = stateBeginValue + s.parseState = s.parseState[0:0] + s.err = nil + s.endTop = false +} + +// eof tells the scanner that the end of input has been reached. +// It returns a scan status just as s.step does. +func (s *scanner) eof() int { + if s.err != nil { + return scanError + } + if s.endTop { + return scanEnd + } + s.step(s, ' ') + if s.endTop { + return scanEnd + } + if s.err == nil { + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + } + return scanError +} + +// pushParseState pushes a new parse state p onto the parse stack. +func (s *scanner) pushParseState(p int) { + s.parseState = append(s.parseState, p) +} + +// popParseState pops a parse state (already obtained) off the stack +// and updates s.step accordingly. +func (s *scanner) popParseState() { + n := len(s.parseState) - 1 + s.parseState = s.parseState[0:n] + if n == 0 { + s.step = stateEndTop + s.endTop = true + } else { + s.step = stateEndValue + } +} + +func isSpace(c byte) bool { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' +} + +// stateBeginValueOrEmpty is the state after reading `[`. +func stateBeginValueOrEmpty(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == ']' { + return stateEndValue(s, c) + } + return stateBeginValue(s, c) +} + +// stateBeginValue is the state at the beginning of the input. +func stateBeginValue(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + switch c { + case '{': + s.step = stateBeginStringOrEmpty + s.pushParseState(parseObjectKey) + return scanBeginObject + case '[': + s.step = stateBeginValueOrEmpty + s.pushParseState(parseArrayValue) + return scanBeginArray + case '"': + s.step = stateInString + return scanBeginLiteral + case '-': + s.step = stateNeg + return scanBeginLiteral + case '0': // beginning of 0.123 + s.step = state0 + return scanBeginLiteral + case 't': // beginning of true + s.step = stateT + return scanBeginLiteral + case 'f': // beginning of false + s.step = stateF + return scanBeginLiteral + case 'n': // beginning of null + s.step = stateN + return scanBeginLiteral + } + if '1' <= c && c <= '9' { // beginning of 1234.5 + s.step = state1 + return scanBeginLiteral + } + return s.error(c, "looking for beginning of value") +} + +// stateBeginStringOrEmpty is the state after reading `{`. +func stateBeginStringOrEmpty(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == '}' { + n := len(s.parseState) + s.parseState[n-1] = parseObjectValue + return stateEndValue(s, c) + } + return stateBeginString(s, c) +} + +// stateBeginString is the state after reading `{"key": value,`. +func stateBeginString(s *scanner, c byte) int { + if c <= ' ' && isSpace(c) { + return scanSkipSpace + } + if c == '"' { + s.step = stateInString + return scanBeginLiteral + } + return s.error(c, "looking for beginning of object key string") +} + +// stateEndValue is the state after completing a value, +// such as after reading `{}` or `true` or `["x"`. +func stateEndValue(s *scanner, c byte) int { + n := len(s.parseState) + if n == 0 { + // Completed top-level before the current byte. + s.step = stateEndTop + s.endTop = true + return stateEndTop(s, c) + } + if c <= ' ' && isSpace(c) { + s.step = stateEndValue + return scanSkipSpace + } + ps := s.parseState[n-1] + switch ps { + case parseObjectKey: + if c == ':' { + s.parseState[n-1] = parseObjectValue + s.step = stateBeginValue + return scanObjectKey + } + return s.error(c, "after object key") + case parseObjectValue: + if c == ',' { + s.parseState[n-1] = parseObjectKey + s.step = stateBeginString + return scanObjectValue + } + if c == '}' { + s.popParseState() + return scanEndObject + } + return s.error(c, "after object key:value pair") + case parseArrayValue: + if c == ',' { + s.step = stateBeginValue + return scanArrayValue + } + if c == ']' { + s.popParseState() + return scanEndArray + } + return s.error(c, "after array element") + } + return s.error(c, "") +} + +// stateEndTop is the state after finishing the top-level value, +// such as after reading `{}` or `[1,2,3]`. +// Only space characters should be seen now. +func stateEndTop(s *scanner, c byte) int { + if !isSpace(c) { + // Complain about non-space byte on next call. + s.error(c, "after top-level value") + } + return scanEnd +} + +// stateInString is the state after reading `"`. +func stateInString(s *scanner, c byte) int { + if c == '"' { + s.step = stateEndValue + return scanContinue + } + if c == '\\' { + s.step = stateInStringEsc + return scanContinue + } + if c < 0x20 { + return s.error(c, "in string literal") + } + return scanContinue +} + +// stateInStringEsc is the state after reading `"\` during a quoted string. +func stateInStringEsc(s *scanner, c byte) int { + switch c { + case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': + s.step = stateInString + return scanContinue + case 'u': + s.step = stateInStringEscU + return scanContinue + } + return s.error(c, "in string escape code") +} + +// stateInStringEscU is the state after reading `"\u` during a quoted string. +func stateInStringEscU(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU1 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. +func stateInStringEscU1(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU12 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. +func stateInStringEscU12(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInStringEscU123 + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. +func stateInStringEscU123(s *scanner, c byte) int { + if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { + s.step = stateInString + return scanContinue + } + // numbers + return s.error(c, "in \\u hexadecimal character escape") +} + +// stateNeg is the state after reading `-` during a number. +func stateNeg(s *scanner, c byte) int { + if c == '0' { + s.step = state0 + return scanContinue + } + if '1' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return s.error(c, "in numeric literal") +} + +// state1 is the state after reading a non-zero integer during a number, +// such as after reading `1` or `100` but not `0`. +func state1(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = state1 + return scanContinue + } + return state0(s, c) +} + +// state0 is the state after reading `0` during a number. +func state0(s *scanner, c byte) int { + if c == '.' { + s.step = stateDot + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateDot is the state after reading the integer and decimal point in a number, +// such as after reading `1.`. +func stateDot(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateDot0 + return scanContinue + } + return s.error(c, "after decimal point in numeric literal") +} + +// stateDot0 is the state after reading the integer, decimal point, and subsequent +// digits of a number, such as after reading `3.14`. +func stateDot0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + if c == 'e' || c == 'E' { + s.step = stateE + return scanContinue + } + return stateEndValue(s, c) +} + +// stateE is the state after reading the mantissa and e in a number, +// such as after reading `314e` or `0.314e`. +func stateE(s *scanner, c byte) int { + if c == '+' || c == '-' { + s.step = stateESign + return scanContinue + } + return stateESign(s, c) +} + +// stateESign is the state after reading the mantissa, e, and sign in a number, +// such as after reading `314e-` or `0.314e+`. +func stateESign(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + s.step = stateE0 + return scanContinue + } + return s.error(c, "in exponent of numeric literal") +} + +// stateE0 is the state after reading the mantissa, e, optional sign, +// and at least one digit of the exponent in a number, +// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. +func stateE0(s *scanner, c byte) int { + if '0' <= c && c <= '9' { + return scanContinue + } + return stateEndValue(s, c) +} + +// stateT is the state after reading `t`. +func stateT(s *scanner, c byte) int { + if c == 'r' { + s.step = stateTr + return scanContinue + } + return s.error(c, "in literal true (expecting 'r')") +} + +// stateTr is the state after reading `tr`. +func stateTr(s *scanner, c byte) int { + if c == 'u' { + s.step = stateTru + return scanContinue + } + return s.error(c, "in literal true (expecting 'u')") +} + +// stateTru is the state after reading `tru`. +func stateTru(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal true (expecting 'e')") +} + +// stateF is the state after reading `f`. +func stateF(s *scanner, c byte) int { + if c == 'a' { + s.step = stateFa + return scanContinue + } + return s.error(c, "in literal false (expecting 'a')") +} + +// stateFa is the state after reading `fa`. +func stateFa(s *scanner, c byte) int { + if c == 'l' { + s.step = stateFal + return scanContinue + } + return s.error(c, "in literal false (expecting 'l')") +} + +// stateFal is the state after reading `fal`. +func stateFal(s *scanner, c byte) int { + if c == 's' { + s.step = stateFals + return scanContinue + } + return s.error(c, "in literal false (expecting 's')") +} + +// stateFals is the state after reading `fals`. +func stateFals(s *scanner, c byte) int { + if c == 'e' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal false (expecting 'e')") +} + +// stateN is the state after reading `n`. +func stateN(s *scanner, c byte) int { + if c == 'u' { + s.step = stateNu + return scanContinue + } + return s.error(c, "in literal null (expecting 'u')") +} + +// stateNu is the state after reading `nu`. +func stateNu(s *scanner, c byte) int { + if c == 'l' { + s.step = stateNul + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateNul is the state after reading `nul`. +func stateNul(s *scanner, c byte) int { + if c == 'l' { + s.step = stateEndValue + return scanContinue + } + return s.error(c, "in literal null (expecting 'l')") +} + +// stateError is the state after reaching a syntax error, +// such as after reading `[1}` or `5.1.2`. +func stateError(s *scanner, c byte) int { + return scanError +} + +// error records an error and switches to the error state. +func (s *scanner) error(c byte, context string) int { + s.step = stateError + s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + return scanError +} + +// quoteChar formats c as a quoted character literal +func quoteChar(c byte) string { + // special cases - different from quoted strings + if c == '\'' { + return `'\''` + } + if c == '"' { + return `'"'` + } + + // use quoted string with different quotation marks + s := strconv.Quote(string(c)) + return "'" + s[1:len(s)-1] + "'" +} diff --git a/stdlib/json_test.go b/stdlib/json_test.go index 337f5fea..08a08583 100644 --- a/stdlib/json_test.go +++ b/stdlib/json_test.go @@ -3,31 +3,43 @@ package stdlib_test import "testing" func TestJSON(t *testing.T) { - module(t, "json").call("stringify", 5).expect("5") - module(t, "json").call("stringify", "foobar").expect(`"foobar"`) - module(t, "json").call("stringify", MAP{"foo": 5}).expect("{\"foo\":5}") - module(t, "json").call("stringify", IMAP{"foo": 5}).expect("{\"foo\":5}") - module(t, "json").call("stringify", ARR{1, 2, 3}).expect("[1,2,3]") - module(t, "json").call("stringify", IARR{1, 2, 3}).expect("[1,2,3]") - module(t, "json").call("stringify", MAP{"foo": "bar"}).expect("{\"foo\":\"bar\"}") - module(t, "json").call("stringify", MAP{"foo": 1.8}).expect("{\"foo\":1.8}") - module(t, "json").call("stringify", MAP{"foo": true}).expect("{\"foo\":true}") - module(t, "json").call("stringify", MAP{"foo": '8'}).expect("{\"foo\":56}") - module(t, "json").call("stringify", MAP{"foo": []byte("foo")}).expect("{\"foo\":\"Zm9v\"}") // json encoding returns []byte as base64 encoded string - module(t, "json").call("stringify", MAP{"foo": ARR{"bar", 1, 1.8, '8', true}}).expect("{\"foo\":[\"bar\",1,1.8,56,true]}") - module(t, "json").call("stringify", MAP{"foo": IARR{"bar", 1, 1.8, '8', true}}).expect("{\"foo\":[\"bar\",1,1.8,56,true]}") - module(t, "json").call("stringify", MAP{"foo": ARR{ARR{"bar", 1}, ARR{"bar", 1}}}).expect("{\"foo\":[[\"bar\",1],[\"bar\",1]]}") - module(t, "json").call("stringify", MAP{"foo": MAP{"string": "bar", "int": 1, "float": 1.8, "char": '8', "bool": true}}).expect("{\"foo\":{\"bool\":true,\"char\":56,\"float\":1.8,\"int\":1,\"string\":\"bar\"}}") - module(t, "json").call("stringify", MAP{"foo": IMAP{"string": "bar", "int": 1, "float": 1.8, "char": '8', "bool": true}}).expect("{\"foo\":{\"bool\":true,\"char\":56,\"float\":1.8,\"int\":1,\"string\":\"bar\"}}") - module(t, "json").call("stringify", MAP{"foo": MAP{"map1": MAP{"string": "bar"}, "map2": MAP{"int": "1"}}}).expect("{\"foo\":{\"map1\":{\"string\":\"bar\"},\"map2\":{\"int\":\"1\"}}}") - module(t, "json").call("stringify", ARR{ARR{"bar", 1}, ARR{"bar", 1}}).expect("[[\"bar\",1],[\"bar\",1]]") + module(t, "json").call("encode", 5).expect([]byte("5")) + module(t, "json").call("encode", "foobar").expect([]byte(`"foobar"`)) + module(t, "json").call("encode", MAP{"foo": 5}).expect([]byte("{\"foo\":5}")) + module(t, "json").call("encode", IMAP{"foo": 5}).expect([]byte("{\"foo\":5}")) + module(t, "json").call("encode", ARR{1, 2, 3}).expect([]byte("[1,2,3]")) + module(t, "json").call("encode", IARR{1, 2, 3}).expect([]byte("[1,2,3]")) + module(t, "json").call("encode", MAP{"foo": "bar"}).expect([]byte("{\"foo\":\"bar\"}")) + module(t, "json").call("encode", MAP{"foo": 1.8}).expect([]byte("{\"foo\":1.8}")) + module(t, "json").call("encode", MAP{"foo": true}).expect([]byte("{\"foo\":true}")) + module(t, "json").call("encode", MAP{"foo": '8'}).expect([]byte("{\"foo\":56}")) + module(t, "json").call("encode", MAP{"foo": []byte("foo")}).expect([]byte("{\"foo\":\"Zm9v\"}")) // json encoding returns []byte as base64 encoded string + module(t, "json").call("encode", MAP{"foo": ARR{"bar", 1, 1.8, '8', true}}).expect([]byte("{\"foo\":[\"bar\",1,1.8,56,true]}")) + module(t, "json").call("encode", MAP{"foo": IARR{"bar", 1, 1.8, '8', true}}).expect([]byte("{\"foo\":[\"bar\",1,1.8,56,true]}")) + module(t, "json").call("encode", MAP{"foo": ARR{ARR{"bar", 1}, ARR{"bar", 1}}}).expect([]byte("{\"foo\":[[\"bar\",1],[\"bar\",1]]}")) + module(t, "json").call("encode", MAP{"foo": MAP{"string": "bar"}}).expect([]byte("{\"foo\":{\"string\":\"bar\"}}")) + module(t, "json").call("encode", MAP{"foo": IMAP{"string": "bar"}}).expect([]byte("{\"foo\":{\"string\":\"bar\"}}")) + module(t, "json").call("encode", MAP{"foo": MAP{"map1": MAP{"string": "bar"}}}).expect([]byte("{\"foo\":{\"map1\":{\"string\":\"bar\"}}}")) + module(t, "json").call("encode", ARR{ARR{"bar", 1}, ARR{"bar", 1}}).expect([]byte("[[\"bar\",1],[\"bar\",1]]")) - module(t, "json").call("parse", `5`).expect(5.0) - module(t, "json").call("parse", `"foo"`).expect("foo") - module(t, "json").call("parse", `[1,2,3,"bar"]`).expect(ARR{1.0, 2.0, 3.0, "bar"}) - module(t, "json").call("parse", `{"foo":5}`).expect(MAP{"foo": 5.0}) - module(t, "json").call("parse", `{"foo":2.5}`).expect(MAP{"foo": 2.5}) - module(t, "json").call("parse", `{"foo":true}`).expect(MAP{"foo": true}) - module(t, "json").call("parse", `{"foo":"bar"}`).expect(MAP{"foo": "bar"}) - module(t, "json").call("parse", `{"foo":[1,2,3,"bar"]}`).expect(MAP{"foo": ARR{1.0, 2.0, 3.0, "bar"}}) + module(t, "json").call("decode", `5`).expect(5.0) + module(t, "json").call("decode", `"foo"`).expect("foo") + module(t, "json").call("decode", `[1,2,3,"bar"]`).expect(ARR{1.0, 2.0, 3.0, "bar"}) + module(t, "json").call("decode", `{"foo":5}`).expect(MAP{"foo": 5.0}) + module(t, "json").call("decode", `{"foo":2.5}`).expect(MAP{"foo": 2.5}) + module(t, "json").call("decode", `{"foo":true}`).expect(MAP{"foo": true}) + module(t, "json").call("decode", `{"foo":"bar"}`).expect(MAP{"foo": "bar"}) + module(t, "json").call("decode", `{"foo":[1,2,3,"bar"]}`).expect(MAP{"foo": ARR{1.0, 2.0, 3.0, "bar"}}) + + module(t, "json").call("indent", []byte("{\"foo\":[\"bar\",1,1.8,56,true]}"), "", " ").expect([]byte(`{ + "foo": [ + "bar", + 1, + 1.8, + 56, + true + ] +}`)) + + module(t, "json").call("html_escape", []byte(`{"M":"foo &`+"\xe2\x80\xa8 \xe2\x80\xa9"+`"}`)).expect([]byte(`{"M":"\u003chtml\u003efoo \u0026\u2028 \u2029\u003c/html\u003e"}`)) }