Skip to content

Commit 2f5859c

Browse files
authored
Merge pull request #200 from Brille24/string_tokenization_part2
Adding the a test for combined tokens
2 parents d487942 + f5b28b6 commit 2f5859c

File tree

2 files changed

+82
-10
lines changed

2 files changed

+82
-10
lines changed

src/PHPCR/Util/QOM/Sql2Scanner.php

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,9 @@ protected function scan($sql2)
149149
$stringStartCharacter = false;
150150
$isEscaped = false;
151151
$escapedQuotesCount = 0;
152-
foreach (\str_split($sql2) as $index => $character) {
152+
$splitString = \str_split($sql2);
153+
for ($index = 0; $index < count($splitString); $index++) {
154+
$character = $splitString[$index];
153155
if (!$stringStartCharacter && in_array($character, [' ', "\t", "\n"], true)) {
154156
if ($currentToken !== '') {
155157
$tokens[] = $currentToken;
@@ -165,12 +167,27 @@ protected function scan($sql2)
165167
$currentToken = '';
166168
continue;
167169
}
170+
171+
// Handling the squared brackets in queries
172+
if (!$isEscaped && $character === '[') {
173+
if ($currentToken !== '') {
174+
$tokens[] = $currentToken;
175+
}
176+
$stringSize = $this->parseBrackets($sql2, $index);
177+
$bracketContent = substr($sql2, $index + 1, $stringSize - 2);
178+
$tokens[] = '['.trim($bracketContent, '"').']';
179+
180+
// We need to subtract one here because the for loop will automatically increment the index
181+
$index += $stringSize - 1;
182+
continue;
183+
}
184+
168185
$currentToken .= $character;
169186

170187
if (!$isEscaped && in_array($character, ['"', "'"], true)) {
171188
// Checking if the previous or next value is a ' to handle the weird SQL strings
172189
// This will not check if the amount of quotes is even
173-
$nextCharacter = $this->getCharacterAtIndex($sql2, $index + 1);
190+
$nextCharacter = $splitString[$index + 1] ?? '';
174191
if ($character === "'" && $nextCharacter === "'") {
175192
$isEscaped = true;
176193
$escapedQuotesCount++;
@@ -188,6 +205,12 @@ protected function scan($sql2)
188205
} elseif (!$stringStartCharacter) {
189206
// If there is no start character already we have found the beginning of a new string
190207
$stringStartCharacter = $character;
208+
209+
// When tokenizing `AS"abc"` add the current token (AS) as token already
210+
if (strlen($currentToken) > 1) {
211+
$tokens[] = substr($currentToken, 0, strlen($currentToken) - 1);
212+
$currentToken = $character;
213+
}
191214
}
192215
}
193216
$isEscaped = $character === '\\';
@@ -203,12 +226,10 @@ protected function scan($sql2)
203226
return $tokens;
204227
}
205228

206-
private function getCharacterAtIndex($string, $index)
229+
private function parseBrackets(string $query, int $index): int
207230
{
208-
if ($index < strlen($string)) {
209-
return $string[$index];
210-
}
231+
$endPosition = strpos($query, ']', $index) + 1;
211232

212-
return '';
233+
return $endPosition - $index;
213234
}
214235
}

tests/PHPCR/Tests/Util/QOM/Sql2ScannerTest.php

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ public function testToken()
2828
/**
2929
* @dataProvider dataTestStringTokenization
3030
*/
31-
public function testStringTokenization()
31+
public function testStringTokenization(string $query)
3232
{
33-
$scanner = new Sql2Scanner('SELECT page.* FROM [nt:unstructured] AS page WHERE name ="Hello world"');
33+
$scanner = new Sql2Scanner($query);
3434
$expected = [
3535
'SELECT',
3636
'page',
@@ -49,7 +49,7 @@ public function testStringTokenization()
4949
$this->expectTokensFromScanner($scanner, $expected);
5050
}
5151

52-
public function dataTestStringTokenization()
52+
public function dataTestStringTokenization(): array
5353
{
5454
$multilineQuery = <<<'SQL'
5555
SELECT page.*
@@ -124,6 +124,57 @@ public function testSQLEscapedStrings2()
124124
$this->expectTokensFromScanner($scanner, $expected);
125125
}
126126

127+
public function testSquareBrackets()
128+
{
129+
$sql = 'WHERE ISSAMENODE(file, ["/home node"])';
130+
131+
$scanner = new Sql2Scanner($sql);
132+
$expected = [
133+
'WHERE',
134+
'ISSAMENODE',
135+
'(',
136+
'file',
137+
',',
138+
'[/home node]',
139+
')',
140+
];
141+
142+
$this->expectTokensFromScanner($scanner, $expected);
143+
}
144+
145+
public function testSquareBracketsWithoutQuotes()
146+
{
147+
$sql = 'WHERE ISSAMENODE(file, [/home node])';
148+
149+
$scanner = new Sql2Scanner($sql);
150+
$expected = [
151+
'WHERE',
152+
'ISSAMENODE',
153+
'(',
154+
'file',
155+
',',
156+
'[/home node]',
157+
')',
158+
];
159+
160+
$this->expectTokensFromScanner($scanner, $expected);
161+
}
162+
163+
public function testTokenizingWithMissingSpaces()
164+
{
165+
$sql = 'SELECT * AS"all"';
166+
167+
$scanner = new Sql2Scanner($sql);
168+
$expected = [
169+
'SELECT',
170+
'*',
171+
'AS',
172+
'"all"',
173+
];
174+
175+
$this->expectTokensFromScanner($scanner, $expected);
176+
}
177+
127178
public function testThrowingErrorOnUnclosedString()
128179
{
129180
$this->expectException(InvalidQueryException::class);

0 commit comments

Comments
 (0)