Skip to content

Commit 244b670

Browse files
Merge pull request #136 from VincentLanglet/fixTwigMB
🐛 Fix issue with multi byte
2 parents 29d524e + ae2f92a commit 244b670

File tree

3 files changed

+35
-11
lines changed

3 files changed

+35
-11
lines changed

TwigCS/src/Token/Tokenizer.php

+14-11
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010

1111
/**
1212
* An override of Twig's Lexer to add whitespace and new line detection.
13+
*
14+
* Since the regex are using bytes as position, mb_ methods are voluntary not used.
15+
* phpcs:disable SymfonyCustom.PHP.EncourageMultiBytes
1316
*/
1417
class Tokenizer
1518
{
@@ -193,7 +196,7 @@ protected function resetState(Source $source): void
193196
$this->bracketsAndTernary = [];
194197

195198
$this->code = str_replace(["\r\n", "\r"], "\n", $source->getCode());
196-
$this->end = mb_strlen($this->code);
199+
$this->end = strlen($this->code);
197200
$this->filename = $source->getName();
198201
}
199202

@@ -310,8 +313,8 @@ protected function moveCurrentPosition(int $value = 1): void
310313
*/
311314
protected function moveCursor(string $value): void
312315
{
313-
$this->cursor += mb_strlen($value);
314-
$this->line += mb_substr_count($value, "\n");
316+
$this->cursor += strlen($value);
317+
$this->line += substr_count($value, "\n");
315318
}
316319

317320
/**
@@ -322,7 +325,7 @@ protected function moveCursor(string $value): void
322325
*/
323326
protected function pushToken(int $type, string $value = null): void
324327
{
325-
$tokenPositionInLine = $this->cursor - mb_strrpos(mb_substr($this->code, 0, $this->cursor), PHP_EOL);
328+
$tokenPositionInLine = $this->cursor - strrpos(substr($this->code, 0, $this->cursor), PHP_EOL);
326329
$this->tokens[] = new Token($type, $this->line, $tokenPositionInLine, $this->filename, $value);
327330
}
328331

@@ -350,7 +353,7 @@ protected function lexExpression(): void
350353
$this->lexName($match[0]);
351354
} elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, 0, $this->cursor)) {
352355
$this->lexNumber($match[0]);
353-
} elseif (false !== mb_strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
356+
} elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
354357
$this->lexPunctuation();
355358
} elseif (preg_match(self::REGEX_STRING, $this->code, $match, 0, $this->cursor)) {
356359
$this->lexString($match[0]);
@@ -436,7 +439,7 @@ protected function lexDqString(): void
436439
$this->lexStartInterpolation();
437440
} elseif (
438441
preg_match(self::REGEX_DQ_STRING_PART, $this->code, $match, 0, $this->cursor)
439-
&& mb_strlen($match[0]) > 0
442+
&& strlen($match[0]) > 0
440443
) {
441444
$this->pushToken(Token::STRING_TYPE, $match[0]);
442445
$this->moveCursor($match[0]);
@@ -500,8 +503,8 @@ protected function lexData(int $limit = 0): void
500503
$value = $match[0];
501504

502505
// Stop if cursor reaches the next token start.
503-
if (0 !== $limit && $limit <= ($this->cursor + mb_strlen($value))) {
504-
$value = mb_substr($value, 0, $limit - $this->cursor);
506+
if (0 !== $limit && $limit <= ($this->cursor + strlen($value))) {
507+
$value = substr($value, 0, $limit - $this->cursor);
505508
}
506509

507510
// Fixing token start among expressions and comments.
@@ -702,7 +705,7 @@ protected function lexPunctuation(): void
702705

703706
return;
704707
}
705-
if (false !== mb_strpos(',)]}', $currentToken)) {
708+
if (false !== strpos(',)]}', $currentToken)) {
706709
// Because {{ foo ? 'yes' }} is the same as {{ foo ? 'yes' : '' }}
707710
do {
708711
array_pop($this->bracketsAndTernary);
@@ -716,9 +719,9 @@ protected function lexPunctuation(): void
716719
}
717720
}
718721

719-
if (false !== mb_strpos('([{', $currentToken)) {
722+
if (false !== strpos('([{', $currentToken)) {
720723
$this->bracketsAndTernary[] = [$currentToken, $this->line];
721-
} elseif (false !== mb_strpos(')]}', $currentToken)) {
724+
} elseif (false !== strpos(')]}', $currentToken)) {
722725
if (0 === count($this->bracketsAndTernary)) {
723726
throw new Exception(sprintf('Unexpected "%s"', $currentToken));
724727
}

TwigCS/tests/Token/Tokenizer/TokenizerTest.php

+20
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,26 @@ public function tokenizeDataProvider(): array
276276
44 => Token::EOF_TYPE,
277277
],
278278
],
279+
[
280+
__DIR__.'/TokenizerTest6.twig',
281+
[
282+
0 => Token::BLOCK_START_TYPE,
283+
1 => Token::WHITESPACE_TYPE,
284+
2 => Token::BLOCK_TAG_TYPE,
285+
3 => Token::WHITESPACE_TYPE,
286+
4 => Token::NAME_TYPE,
287+
5 => Token::WHITESPACE_TYPE,
288+
6 => Token::BLOCK_END_TYPE,
289+
7 => Token::TEXT_TYPE,
290+
8 => Token::BLOCK_START_TYPE,
291+
9 => Token::WHITESPACE_TYPE,
292+
10 => Token::BLOCK_TAG_TYPE,
293+
11 => Token::WHITESPACE_TYPE,
294+
12 => Token::BLOCK_END_TYPE,
295+
13 => Token::EOL_TYPE,
296+
14 => Token::EOF_TYPE,
297+
],
298+
],
279299
];
280300
}
281301
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{% block try %}⇨{% endblock %}

0 commit comments

Comments
 (0)