Skip to content

Commit

Permalink
Optimize YAML parser helper functions to use less stack (#2557)
Browse files Browse the repository at this point in the history
  • Loading branch information
kasperl authored Oct 4, 2024
1 parent 5246d2d commit 803405a
Showing 1 changed file with 50 additions and 66 deletions.
116 changes: 50 additions & 66 deletions lib/encoding/yaml/parser.toit
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,7 @@ abstract class PegParserBase_:
null and false are considered false values, everything else is true.
*/
lookahead [block] -> bool:
result/any := false
try-parse: result = block.call
result := try-parse block
return not (not result) // Boolify the result.
/**
Expand Down Expand Up @@ -295,18 +294,20 @@ abstract class PegParserBase_:
Use this for *- and +-productions.
*/
repeat --at-least-one/bool=false [block] -> List?:
try-parse:
result := []
while true:
mark := mark
res := block.call
if not res:
rollback mark
break
if at-mark mark: break // No progress, so matched empty. This should terminate the loop.
result.add res
if not at-least-one or not result.is-empty: return result
return null
result := []
while true:
rollback-mark := mark
element := block.call
if not element:
rollback rollback-mark
break
if at-mark rollback-mark:
// No progress, so matched empty. This should terminate the loop.
break
result.add element
return not at-least-one or not result.is-empty
? result
: null

/**
Calls $block and returns its value if it is truthful, consuming the input.
Expand All @@ -316,10 +317,8 @@ abstract class PegParserBase_:
Use this for ?-productions.
*/
optional --or-null/bool=false [block] -> any:
try-parse:
if res := block.call: return res

return or-null ? null : EMPTY-NODE_
result := try-parse block
return result or (or-null ? null : EMPTY-NODE_)

can-read num/int -> bool:
return offset_ + num <= bytes_.size
Expand Down Expand Up @@ -362,6 +361,9 @@ class ParseResult_:
documents/List
constructor .documents:

class ParseError_:
message/string
constructor .message:

/**
A YAML parser based on https://yaml.org/spec/1.2.2.
Expand All @@ -385,14 +387,10 @@ class Parser_ extends PegParserBase_:
This mark is frequently in the "future".
*/
forbidden-mark/int? := null
error/string? := null

constructor bytes/ByteArray:
super bytes

set-error error/string:
if not this.error: this.error = error

can-read num/int -> bool:
super-result := super num
if not super-result: return false
Expand All @@ -401,26 +399,21 @@ class Parser_ extends PegParserBase_:
forbidden-distance := -(bytes-since-mark forbidden-mark)
return forbidden-distance >= num

as-bool val/any -> bool:
static as-bool val/any -> bool:
return not (not val)

try-parse --as-bool/bool=false [block] -> any:
// Error-aware try-parse to limit unnecessary work on parse errors and
// supports boolean conversion
result := ?
if error:
result = null
else:
result = super block

if as-bool: result = this.as-bool result
return result
// Variant of try-parse that converts to boolean.
try-parse --as-bool/True [block] -> bool:
rollback-mark := mark
result := block.call
if not result:
rollback rollback-mark
return false
return true

check-valid-key key/ValueNode_? -> ValueNode_?:
if not key: return null
if error_ := key.check-key:
set-error error_
return null
if error := key.check-key: throw (ParseError_ error)
return key

match-hex digits/int -> int?:
Expand Down Expand Up @@ -468,22 +461,21 @@ class Parser_ extends PegParserBase_:

// Overall structure.
l-yaml-stream [--on-error] -> any:
repeat: l-document-prefix
documents := []
if document := l-any-document: documents.add document
error/ParseError_? := catch --unwind=(: it is not ParseError_):
repeat: l-document-prefix
if document := l-any-document: documents.add document

// Modifies the documents list with any additional documents.
repeat: l-yaml-stream-helper documents
// Modifies the documents list with any additional documents.
repeat: l-yaml-stream-helper documents

if not l-eof:
set-error "INVALID_YAML_DOCUMENT"
if not l-eof:
throw (ParseError_ "INVALID_YAML_DOCUMENT")

parsed-value := documents.map: | node/ValueNode_ | node.resolve

if error:
return on-error.call error

return ParseResult_ parsed-value
return error
? on-error.call error.message
: ParseResult_ parsed-value

/**
Parses the remaining documents and comments from the stream.
Expand Down Expand Up @@ -564,11 +556,9 @@ class Parser_ extends PegParserBase_:
parts := version.split "."
major := int.parse parts[0]
minor := int.parse parts[1]
if major > 1 or minor > 2:
set-error "UNSUPPORTED_YAML_VERSION"
false
else:
true
// Only versions 1.0, 1.1, and 1.2 are supported.
if major != 1 or minor > 2: throw (ParseError_ "UNSUPPORTED_YAML_VERSION")
true

ns-tag-directive -> bool:
return try-parse --as-bool:
Expand Down Expand Up @@ -752,9 +742,7 @@ class Parser_ extends PegParserBase_:
try-parse:
key := ns-s-block-map-implicit-key or e-node and EMPTY-NODE_
if val := c-l-block-map-implicit-value n:
if error_ := key.check-key:
set-error error_
return null
if error := key.check-key: throw (ParseError_ error)
return [key, val]
return null

Expand Down Expand Up @@ -807,8 +795,7 @@ class Parser_ extends PegParserBase_:
c-ns-alias-node -> ValueNode_?:
if anchor := (try-parse: match-char C-ALIAS_ and ns-anchor-name):
if not named-nodes.contains anchor:
set-error "UNRESOLVED_ALIAS"
return null
throw (ParseError_ "UNRESOLVED_ALIAS")
return named-nodes[anchor]
return null

Expand Down Expand Up @@ -1420,10 +1407,9 @@ class Parser_ extends PegParserBase_:
// The spec does not mention anything about surrogates, but we assume that since YAML is an
// extension of JSON that 16-bit surrogates shoud be supported.
if part-2 := c-ns-esc-char:
if not 0xdc00 <= part-2 <= 0xdfff:
set-error "INVALID_SURROGATE_PAIR"
return null
return 0x10000 + ((res & 0x3ff) << 10) | (part-2 & 0x3ff)
if not 0xdc00 <= part-2 <= 0xdfff:
throw (ParseError_ "INVALID_SURROGATE_PAIR")
return 0x10000 + ((res & 0x3ff) << 10) | (part-2 & 0x3ff)
else:
return res
if match-char 'U': if res := match-hex 8: return res
Expand Down Expand Up @@ -1512,13 +1498,11 @@ class Parser_ extends PegParserBase_:
if match-buffer #[0xFE, 0xFF] or
match-buffer #[0xFF, 0xFE] or
match-char 0:
set-error "UNSUPPORTED_BYTE_ORDER"
return false
throw (ParseError_ "UNSUPPORTED_BYTE_ORDER")
try-parse:
match-any
if match-char 0:
set-error "UNSUPPORTED_BYTE_ORDER"
return false
throw (ParseError_ "UNSUPPORTED_BYTE_ORDER")
// Toit only supports UTF-8.
if can-read 3 and match-buffer #[0xEF, 0xBB, 0xBF]:
return true
Expand Down

0 comments on commit 803405a

Please sign in to comment.