Skip to content

Commit 25a12c9

Browse files
authored
fix parse error caused by certain unicode characters (#270)
1 parent abea646 commit 25a12c9

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed

classes/local/lexer.php

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -133,19 +133,19 @@ private function read_next_token(): ?token {
133133
return $this->read_operator();
134134
}
135135
// There are some single-character tokens...
136-
if (preg_match('/[]\[(){},;π\\\]/', $currentchar)) {
137-
$types = [
138-
'[' => token::OPENING_BRACKET,
139-
'(' => token::OPENING_PAREN,
140-
'{' => token::OPENING_BRACE,
141-
']' => token::CLOSING_BRACKET,
142-
')' => token::CLOSING_PAREN,
143-
'}' => token::CLOSING_BRACE,
144-
',' => token::ARG_SEPARATOR,
145-
'\\' => token::PREFIX,
146-
';' => token::END_OF_STATEMENT,
147-
'π' => token::CONSTANT,
148-
];
136+
$types = [
137+
'[' => token::OPENING_BRACKET,
138+
'(' => token::OPENING_PAREN,
139+
'{' => token::OPENING_BRACE,
140+
']' => token::CLOSING_BRACKET,
141+
')' => token::CLOSING_PAREN,
142+
'}' => token::CLOSING_BRACE,
143+
',' => token::ARG_SEPARATOR,
144+
'\\' => token::PREFIX,
145+
';' => token::END_OF_STATEMENT,
146+
'π' => token::CONSTANT,
147+
];
148+
if (in_array($currentchar, array_keys($types))) {
149149
return $this->read_single_char_token($types[$currentchar]);
150150
}
151151
// If we are still here, that's not good at all. We need to read the char (it is only peeked

tests/lexer_test.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,16 @@ public function test_read_invalid_string($expected, $input): void {
829829
self::assertNotNull($e);
830830
}
831831

832+
public function test_unexpected_unicode_char_in_input(): void {
833+
$e = null;
834+
try {
835+
new lexer('I’m');
836+
} catch (Exception $e) {
837+
self::assertEquals("1:2:Unexpected input: '’'", $e->getMessage());
838+
}
839+
self::assertNotNull($e);
840+
}
841+
832842
/**
833843
* Test whether the read() function of the tokenizer class correctly parses special
834844
* cases involving numbers.

0 commit comments

Comments
 (0)