Skip to content

Commit

Permalink
add parsing that requires JSON objects specifically
Browse files Browse the repository at this point in the history
  • Loading branch information
mumbleskates committed Mar 18, 2024
1 parent b99ebe3 commit 814fdd6
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 62 deletions.
18 changes: 18 additions & 0 deletions decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ func Unmarshal(b []byte) (any, error) {
return val, p.CheckEmpty()
}

func UnmarshalObject(b []byte) (map[string]any, error) {
p := NewParserFromSlice(b)
val, err := p.ParseObject()
if err != nil {
return nil, err
}
return val, p.CheckEmpty()
}

// UnmarshalString decodes a JSON representation from b as a generic
// value: int64, float64, string, bool, nil, []any, or map[string]any.
func UnmarshalString(s string) (any, error) {
Expand All @@ -21,3 +30,12 @@ func UnmarshalString(s string) (any, error) {
}
return val, p.CheckEmpty()
}

func UnmarshalObjectString(s string) (map[string]any, error) {
p := NewParserFromString(s)
val, err := p.ParseObject()
if err != nil {
return nil, err
}
return val, p.CheckEmpty()
}
6 changes: 6 additions & 0 deletions json_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,9 @@ func TestControlCharacters(t *testing.T) {
assert.ErrorContains(t, err, "simple json: control character, tab, or newline in string value")
}
}

func TestParseObject(t *testing.T) {
val, err := UnmarshalObjectString(` {"a": 1 } `)
require.NoError(t, err)
assert.Equal(t, val, map[string]any{"a": int64(1)})
}
170 changes: 108 additions & 62 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ type Parser interface {
// and return it. If the data is empty, the exact error io.EOF will be
// returned.
Parse() (any, error)
// ParseObject parses JSON from the front of the contained data as a
// simply-typed JSON object and return it. If the JSON is a value of a type
// other than object, an error will be returned. If the data is empty, the
// exact error io.EOF will be returned.
ParseObject() (map[string]any, error)
// NextLine consumes whitespace up to the next newline, returning an error
// if something other than whitespace exists before the next newline, or
// returning the exact error io.EOF if the end of data is found first. This
Expand All @@ -67,6 +72,16 @@ type Parser interface {
//
// See: https://go.dev/wiki/RangefuncExperiment
IterLines() func(func(any, error) bool)
// IterObjectLines returns a Range-func iterable for reading JSONL,
// enforcing that each line must also be a JSON object rather than another
// kind of JSON value.
//
// Returns an iterable go1.22+ RangeFunc that yields each line of a JSONL
// until the end of data. If an error occurs, it will be yielded on its own
// and iteration will stop.
//
// See: https://go.dev/wiki/RangefuncExperiment
IterObjectLines() func(func(map[string]any, error) bool)
// CheckEmpty checks that the remaining data is all whitespace, returning an
// error if not.
CheckEmpty() error
Expand Down Expand Up @@ -556,6 +571,14 @@ func (p *parser) Parse() (val any, err error) {
return p.doParse(maxDepth)
}

func (p *parser) ParseObject() (map[string]any, error) {
err := p.skipSpaces()
if err != nil {
return nil, err
}
return p.doParseObject(maxDepth)
}

func (p *parser) doParse(remainingDepth int) (val any, err error) {
if remainingDepth < 0 {
return nil, errMaxDepth
Expand Down Expand Up @@ -622,78 +645,80 @@ func (p *parser) doParse(remainingDepth int) (val any, err error) {
}
val = arr
case objectTy:
// TODO(kent): break out parse object and parse array so we can force
// decoding those types specifically since that's often desired
// Consume the beginning of the map
err = p.consumeObjectBegin()
val, err = p.doParseObject(remainingDepth)
case commaSym:
return nil, errUnexpectedComma
case endGroupSym:
return nil, errUnexpectedEnd
case unknownTy:
panic("unreachable")
}
if err != nil {
val = nil
}
return
}

func (p *parser) doParseObject(remainingDepth int) (obj map[string]any, err error) {
// Consume the beginning of the map
err = p.consumeObjectBegin()
if err != nil {
return nil, err
}
for {
var ty valType
ty, err = p.parseType()
if err != nil {
return nil, err
return
}
var obj map[string]any
for {
ty, err = p.parseType()
if ty == endGroupSym {
// Found an ending brace/bracket immediately after the start of
// the object or one of its items, cleanly ending the object
err = p.consumeObjectEnd()
if err != nil {
return
}
if ty == endGroupSym {
// Found an ending brace/bracket immediately after the start of
// the object or one of its items, cleanly ending the object
err = p.consumeObjectEnd()
if err != nil {
return
}
break
} else if obj == nil {
if ty == commaSym {
// Found a comma with no previous value
return nil, errUnexpectedComma
}
// Initialize the object's map
obj = make(map[string]any)
} else {
// We just parsed an item and the object hasn't ended. We MUST
// find a comma next.
err = p.consumeComma()
if err != nil {
return
}
}
// We now have a regular following item, not an errant comma or the
// end of the object.
var objKeyBytes []byte
var objVal any
err = p.skipSpaces()
if err != nil {
return
}
// Read the map key, which MUST be a string.
objKeyBytes, err = p.parseString()
if err != nil {
return
}
objKey := string(objKeyBytes)
// Consume the ':' separating the key and value
err = p.consumeColon()
if err != nil {
return
break
} else if obj == nil {
if ty == commaSym {
// Found a comma with no previous value
return nil, errUnexpectedComma
}
// Read the value, which may be of any type.
objVal, err = p.doParse(remainingDepth - 1)
// Initialize the object's map
obj = make(map[string]any)
} else {
// We just parsed an item and the object hasn't ended. We MUST
// find a comma next.
err = p.consumeComma()
if err != nil {
return
}
obj[objKey] = objVal
}
val = obj
case commaSym:
return nil, errUnexpectedComma
case endGroupSym:
return nil, errUnexpectedEnd
case unknownTy:
panic("unreachable")
}
if err != nil {
val = nil
// We now have a regular following item, not an errant comma or the
// end of the object.
var objKeyBytes []byte
var objVal any
err = p.skipSpaces()
if err != nil {
return
}
// Read the map key, which MUST be a string.
objKeyBytes, err = p.parseString()
if err != nil {
return
}
objKey := string(objKeyBytes)
// Consume the ':' separating the key and value
err = p.consumeColon()
if err != nil {
return
}
// Read the value, which may be of any type.
objVal, err = p.doParse(remainingDepth - 1)
if err != nil {
return
}
obj[objKey] = objVal
}
return
}
Expand Down Expand Up @@ -746,6 +771,27 @@ func (p *parser) IterLines() func(func(any, error) bool) {
}
}

func (p *parser) IterObjectLines() func(func(map[string]any, error) bool) {
return func(yield func(map[string]any, error) bool) {
for {
val, err := p.ParseObject()
if err == io.EOF {
return
}
// Stop when we're told to or there's any error
if !yield(val, err) || err != nil {
return
}
if err := p.NextLine(); err != nil {
if err != io.EOF {
yield(nil, err)
}
return
}
}
}
}

func (p *parser) CheckEmpty() error {
err := p.skipSpaces()
if err != nil {
Expand Down

0 comments on commit 814fdd6

Please sign in to comment.