Skip to content

Commit

Permalink
Merge pull request #2700 from ruby/lex-embdoc
Browse files Browse the repository at this point in the history
Fix up embdoc lexing on EOF
  • Loading branch information
kddnewton authored Apr 12, 2024
2 parents f28393d + 8ee43be commit a0f2306
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 13 deletions.
28 changes: 20 additions & 8 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -9605,15 +9605,23 @@ lex_embdoc(pm_parser_t *parser) {
pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
if (comment == NULL) return PM_TOKEN_EOF;

// Now, loop until we find the end of the embedded documentation or the end of
// the file.
// Now, loop until we find the end of the embedded documentation or the end
// of the file.
while (parser->current.end + 4 <= parser->end) {
parser->current.start = parser->current.end;

// If we've hit the end of the embedded documentation then we'll return that
// token here.
if (memcmp(parser->current.end, "=end", 4) == 0 &&
(parser->current.end + 4 == parser->end || pm_char_is_whitespace(parser->current.end[4]))) {
// If we've hit the end of the embedded documentation then we'll return
// that token here.
if (
(memcmp(parser->current.end, "=end", 4) == 0) &&
(
(parser->current.end + 4 == parser->end) || // end of file
pm_char_is_whitespace(parser->current.end[4]) || // whitespace
(parser->current.end[4] == '\0') || // NUL or end of script
(parser->current.end[4] == '\004') || // ^D
(parser->current.end[4] == '\032') // ^Z
)
) {
const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);

if (newline == NULL) {
Expand Down Expand Up @@ -10425,9 +10433,13 @@ parser_lex(pm_parser_t *parser) {

// = => =~ == === =begin
case '=':
if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
if (
current_token_starts_line(parser) &&
(parser->current.end + 5 <= parser->end) &&
memcmp(parser->current.end, "begin", 5) == 0 &&
(pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
) {
pm_token_type_t type = lex_embdoc(parser);

if (type == PM_TOKEN_EOF) {
LEX(type);
}
Expand Down
2 changes: 1 addition & 1 deletion templates/src/diagnostic.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ static const pm_diagnostic_data_t diagnostic_messages[PM_DIAGNOSTIC_ID_MAX] = {
[PM_ERR_DEF_RECEIVER_TERM] = { "expected a `.` or `::` after the receiver in a method definition", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEF_TERM] = { "expected an `end` to close the `def` statement", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_DEFINED_EXPRESSION] = { "expected an expression after `defined?`", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EMBDOC_TERM] = { "could not find a terminator for the embedded document", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EMBDOC_TERM] = { "embedded document meets end of file", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EMBEXPR_END] = { "expected a `}` to close the embedded expression", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_EMBVAR_INVALID] = { "invalid embedded variable", PM_ERROR_LEVEL_SYNTAX },
[PM_ERR_END_UPCASE_BRACE] = { "expected a `{` after `END`", PM_ERROR_LEVEL_SYNTAX },
Expand Down
13 changes: 9 additions & 4 deletions test/prism/errors_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,14 @@ def test_pre_execution_context
end

def test_unterminated_embdoc
assert_errors expression("1"), "1\n=begin\n", [
["could not find a terminator for the embedded document", 2..9]
]
message = "embedded document meets end of file"
assert_error_messages "=begin", [message]
assert_error_messages "=begin\n", [message]

refute_error_messages "=begin\n=end"
refute_error_messages "=begin\n=end\0"
refute_error_messages "=begin\n=end\C-d"
refute_error_messages "=begin\n=end\C-z"
end

def test_unterminated_i_list
Expand Down Expand Up @@ -2217,7 +2222,7 @@ def assert_error_messages(source, errors)

def refute_error_messages(source)
assert_valid_syntax(source)
assert Prism.parse_success?(source)
assert Prism.parse_success?(source), "Expected #{source.inspect} to parse successfully"
end

def assert_warning_messages(source, warnings)
Expand Down

0 comments on commit a0f2306

Please sign in to comment.