From d744b497007e1b3a993222dd378574ca386f9639 Mon Sep 17 00:00:00 2001 From: David Grudl Date: Thu, 16 Jan 2025 08:23:27 +0100 Subject: [PATCH] regexp: uses extended mode (BC break) --- src/Texy/Modifier.php | 2 +- src/Texy/Modules/BlockModule.php | 2 +- src/Texy/Modules/BlockQuoteModule.php | 2 +- src/Texy/Modules/FigureModule.php | 2 +- src/Texy/Modules/HeadingModule.php | 4 +-- src/Texy/Modules/HorizLineModule.php | 2 +- src/Texy/Modules/HtmlModule.php | 6 ++-- src/Texy/Modules/HtmlOutputModule.php | 2 +- src/Texy/Modules/ImageModule.php | 4 +-- src/Texy/Modules/LinkModule.php | 10 +++---- src/Texy/Modules/ListModule.php | 8 +++--- src/Texy/Modules/ParagraphModule.php | 2 +- src/Texy/Modules/PhraseModule.php | 40 +++++++++++++-------------- src/Texy/Modules/TableModule.php | 8 +++--- 14 files changed, 47 insertions(+), 47 deletions(-) diff --git a/src/Texy/Modifier.php b/src/Texy/Modifier.php index 64472e4f..0564ac91 100644 --- a/src/Texy/Modifier.php +++ b/src/Texy/Modifier.php @@ -62,7 +62,7 @@ public function setProperties(?string $s): void $ch = $s[$p]; if ($ch === '(') { // title - preg_match('#(?:\\\\\)|[^)\n])++\)#', $s, $m, 0, $p); + preg_match('~(?:\\\\\)|[^)\n])++\)~', $s, $m, 0, $p); $this->title = Helpers::unescapeHtml(str_replace('\)', ')', trim(substr($m[0], 1, -1)))); $p += strlen($m[0]); diff --git a/src/Texy/Modules/BlockModule.php b/src/Texy/Modules/BlockModule.php index d1bc3f3b..09d03a48 100644 --- a/src/Texy/Modules/BlockModule.php +++ b/src/Texy/Modules/BlockModule.php @@ -38,7 +38,7 @@ public function __construct(Texy\Texy $texy) $texy->registerBlockPattern( $this->pattern(...), - '~^/--++\ *+(.*)' . Texy\Patterns::MODIFIER_H . '?$((?:\n(?0)|\n.*+)*)(?:\n\\\--.*$|\z)~mUi', + '~^/--++\ *+(.*)' . Texy\Patterns::MODIFIER_H . '?$((?:\n(?0)|\n.*+)*)(?:\n\\\--.*$|\z)~mUix', 'blocks', ); } diff --git a/src/Texy/Modules/BlockQuoteModule.php b/src/Texy/Modules/BlockQuoteModule.php index dce85750..377d9f5c 100644 --- a/src/Texy/Modules/BlockQuoteModule.php +++ b/src/Texy/Modules/BlockQuoteModule.php @@ -23,7 +23,7 @@ public function __construct(Texy\Texy $texy) $texy->registerBlockPattern( $this->pattern(...), - '~^(?:' . Texy\Patterns::MODIFIER_H . '\n)?\>([\ \t]++|:)(\S.*+)$~mU', // original + '~^(?:' . Texy\Patterns::MODIFIER_H . '\n)?\>([\ \t]++|:)(\S.*+)$~mUx', // original 'blockquote', ); } diff --git a/src/Texy/Modules/FigureModule.php b/src/Texy/Modules/FigureModule.php index 9237beb5..67b89474 100644 --- a/src/Texy/Modules/FigureModule.php +++ b/src/Texy/Modules/FigureModule.php @@ -40,7 +40,7 @@ public function __construct(Texy\Texy $texy) $texy->registerBlockPattern( $this->pattern(...), '~^\[\*\ *+([^\n' . Patterns::MARK . ']{1,1000})' . Patterns::MODIFIER . '?\ *+(\*|(?|<)\]' // [* urls .(title)[class]{style} >] - . '(?::(' . Patterns::LINK_URL . '|:))??\ ++\*\*\*\ ++(.{0,2000})' . Patterns::MODIFIER_H . '?()$~mU', + . '(?::(' . Patterns::LINK_URL . '|:))??\ ++\*\*\*\ ++(.{0,2000})' . Patterns::MODIFIER_H . '?()$~mUx', 'figure', ); } diff --git a/src/Texy/Modules/HeadingModule.php b/src/Texy/Modules/HeadingModule.php index b104f603..8de73afc 100644 --- a/src/Texy/Modules/HeadingModule.php +++ b/src/Texy/Modules/HeadingModule.php @@ -65,13 +65,13 @@ public function __construct(Texy\Texy $texy) $texy->registerBlockPattern( $this->patternUnderline(...), '~^(\S.{0,1000})' . Texy\Patterns::MODIFIER_H . '?\n' - . '(#{3,}+|\*{3,}+|={3,}+|-{3,}+)$~mU', + . '(\#{3,}+|\*{3,}+|={3,}+|-{3,}+)$~mUx', 'heading/underlined', ); $texy->registerBlockPattern( $this->patternSurround(...), - '~^(#{2,}+|={2,}+)(.+)' . Texy\Patterns::MODIFIER_H . '?()$~mU', + '~^(\#{2,}+|={2,}+)(.+)' . Texy\Patterns::MODIFIER_H . '?()$~mUx', 'heading/surrounded', ); } diff --git a/src/Texy/Modules/HorizLineModule.php b/src/Texy/Modules/HorizLineModule.php index d38ed104..cbe7e81f 100644 --- a/src/Texy/Modules/HorizLineModule.php +++ b/src/Texy/Modules/HorizLineModule.php @@ -32,7 +32,7 @@ public function __construct(Texy\Texy $texy) $texy->registerBlockPattern( $this->pattern(...), - '~^(\*{3,}+|-{3,}+)[\ \t]*' . Texy\Patterns::MODIFIER . '?()$~mU', + '~^(\*{3,}+|-{3,}+)[\ \t]*' . Texy\Patterns::MODIFIER . '?()$~mUx', 'horizline', ); } diff --git a/src/Texy/Modules/HtmlModule.php b/src/Texy/Modules/HtmlModule.php index 5835455b..80158d2d 100644 --- a/src/Texy/Modules/HtmlModule.php +++ b/src/Texy/Modules/HtmlModule.php @@ -32,13 +32,13 @@ public function __construct(Texy\Texy $texy) $texy->registerLinePattern( $this->patternTag(...), - '~<(/?)([a-z][a-z0-9_:-]{0,50})((?:\s++[a-z0-9\_:-]++|=\s*+"[^"' . Patterns::MARK . ']*+"|=\s*+\'[^\'' . Patterns::MARK . ']*+\'|=[^\s>' . Patterns::MARK . ']++)*)\s*+(/?)>~is', + '~<(/?)([a-z][a-z0-9_:-]{0,50})((?:\s++[a-z0-9\_:-]++|=\s*+"[^"' . Patterns::MARK . ']*+"|=\s*+\'[^\'' . Patterns::MARK . ']*+\'|=[^\s>' . Patterns::MARK . ']++)*)\s*+(/?)>~isx', 'html/tag', ); $texy->registerLinePattern( $this->patternComment(...), - '~~is', + '~~isx', 'html/comment', ); } @@ -288,7 +288,7 @@ private function parseAttributes(string $attrs): array { $matches = $res = []; preg_match_all( - '~([a-z0-9\_:-]+)\s*(?:=\s*(\'[^\']*\'|"[^"]*"|[^\'"\s]+))?()~isu', + '~([a-z0-9\_:-]+)\s*(?:=\s*(\'[^\']*\'|"[^"]*"|[^\'"\s]+))?()~isux', $attrs, $matches, PREG_SET_ORDER, diff --git a/src/Texy/Modules/HtmlOutputModule.php b/src/Texy/Modules/HtmlOutputModule.php index 09b6710b..9dd0451a 100644 --- a/src/Texy/Modules/HtmlOutputModule.php +++ b/src/Texy/Modules/HtmlOutputModule.php @@ -78,7 +78,7 @@ private function postProcess(Texy\Texy $texy, string &$s): void } // right trim - $s = Regexp::replace($s, "~[\t ]+(\n|\r|$)~", '$1'); // right trim + $s = Regexp::replace($s, '~[\t ]+(\n|\r|$)~', '$1'); // right trim // join double \r to single \n $s = str_replace("\r\r", "\n", $s); diff --git a/src/Texy/Modules/ImageModule.php b/src/Texy/Modules/ImageModule.php index 39f332c5..dfbbb4fd 100644 --- a/src/Texy/Modules/ImageModule.php +++ b/src/Texy/Modules/ImageModule.php @@ -54,7 +54,7 @@ public function __construct(Texy\Texy $texy) $texy->registerLinePattern( $this->patternImage(...), '~\[\*\ *+([^\n' . Patterns::MARK . ']{1,1000})' . Patterns::MODIFIER . '?\ *+(\*|(?|<)\]' // [* urls .(title)[class]{style} >] - . '(?::(' . Patterns::LINK_URL . '|:))??()~U', + . '(?::(' . Patterns::LINK_URL . '|:))??()~Ux', 'image', ); } @@ -69,7 +69,7 @@ private function beforeParse(Texy\Texy $texy, &$text): void // [*image*]: urls .(title)[class]{style} $text = Texy\Regexp::replace( $text, - '~^\[\*([^\n]{1,100})\*\]:[\ \t]+(.{1,1000})[\ \t]*' . Patterns::MODIFIER . '?\s*()$~mU', + '~^\[\*([^\n]{1,100})\*\]:[\ \t]+(.{1,1000})[\ \t]*' . Patterns::MODIFIER . '?\s*()$~mUx', $this->patternReferenceDef(...), ); } diff --git a/src/Texy/Modules/LinkModule.php b/src/Texy/Modules/LinkModule.php index fb922425..ab1d3e17 100644 --- a/src/Texy/Modules/LinkModule.php +++ b/src/Texy/Modules/LinkModule.php @@ -56,14 +56,14 @@ public function __construct(Texy\Texy $texy) // [reference] $texy->registerLinePattern( $this->patternReference(...), - '~(\[[^\[\]\*\n' . Patterns::MARK . ']++\])~U', + '~(\[[^\[\]\*\n' . Patterns::MARK . ']++\])~Ux', 'link/reference', ); // direct url; charaters not allowed in URL <>[\]^`{|} $texy->registerLinePattern( $this->patternUrlEmail(...), - '~(?<=^|[\s([<:\x17])(?:https?://|www\.|ftp://)[0-9.' . Patterns::CHAR . '-][/\d' . Patterns::CHAR . '+\.\~%&?@=_:;#$!,*()\x{ad}-]{1,1000}[/\d' . Patterns::CHAR . '+\~?@=_#$*]~', + '~(?<=^|[\s([<:\x17])(?:https?://|www\.|ftp://)[0-9.' . Patterns::CHAR . '-][/\d' . Patterns::CHAR . '+\.\~%&?@=_:;#$!,*()\x{ad}-]{1,1000}[/\d' . Patterns::CHAR . '+\~?@=_#$*]~x', 'link/url', '~(?:https?://|www\.|ftp://)~', ); @@ -72,7 +72,7 @@ public function __construct(Texy\Texy $texy) self::$EMAIL = '[' . Patterns::CHAR . '][0-9.+_' . Patterns::CHAR . '-]{0,63}@[0-9.+_' . Patterns::CHAR . '\x{ad}-]{1,252}\.[' . Patterns::CHAR . '\x{ad}]{2,19}'; $texy->registerLinePattern( $this->patternUrlEmail(...), - '~(?<=^|[\s([<\x17])' . self::$EMAIL . '~', + '~(?<=^|[\s([<\x17])' . self::$EMAIL . '~x', 'link/email', '~' . self::$EMAIL . '~', ); @@ -90,7 +90,7 @@ private function beforeParse(Texy\Texy $texy, &$text): void if (!empty($texy->allowed['link/definition'])) { $text = Texy\Regexp::replace( $text, - '~^\[([^\[\]#\?\*\n]{1,100})\]:\ ++(\S{1,1000})([\ \t].{1,1000})?' . Patterns::MODIFIER . '?\s*()$~mU', + '~^\[([^\[\]#\?\*\n]{1,100})\]:\ ++(\S{1,1000})([\ \t].{1,1000})?' . Patterns::MODIFIER . '?\s*()$~mUx', $this->patternReferenceDef(...), ); } @@ -362,7 +362,7 @@ private function textualUrl(Link $link): string : $link->raw; // parse_url() in PHP damages UTF-8 - use regular expression - if (!preg_match('~^(?:(?P[a-z]+):)?(?://(?P[^/?#]+))?(?P(?:/|^)(?!/)[^?#]*)?(?:\?(?P[^#]*))?(?:#(?P.*))?()$~u', $raw, $parts)) { + if (!preg_match('~^(?:(?P[a-z]+):)?(?://(?P[^/?#]+))?(?P(?:/|^)(?!/)[^?#]*)?(?:\?(?P[^#]*))?(?:\#(?P.*))?()$~ux', $raw, $parts)) { return $link->raw; } diff --git a/src/Texy/Modules/ListModule.php b/src/Texy/Modules/ListModule.php index 8545124d..8189717a 100644 --- a/src/Texy/Modules/ListModule.php +++ b/src/Texy/Modules/ListModule.php @@ -58,7 +58,7 @@ private function beforeParse(): void $this->texy->registerBlockPattern( $this->patternList(...), '~^(?:' . Patterns::MODIFIER_H . '\n)?' // .{color: red} - . '(' . implode('|', $RE) . ')[\ \t]*+\S.*$~mU', // item (unmatched) + . '(' . implode('|', $RE) . ')[\ \t]*+\S.*$~mUx', // item (unmatched) 'list', ); @@ -66,7 +66,7 @@ private function beforeParse(): void $this->patternDefList(...), '~^(?:' . Patterns::MODIFIER_H . '\n)?' // .{color:red} . '(\S.{0,2000})\:[\ \t]*' . Patterns::MODIFIER_H . '?\n' // Term: - . '([\ \t]++)(' . implode('|', $REul) . ')[\ \t]*+\S.*$~mU', // - description + . '([\ \t]++)(' . implode('|', $REul) . ')[\ \t]*+\S.*$~mUx', // - description 'list/definition', ); } @@ -162,7 +162,7 @@ public function patternDefList(BlockParser $parser, array $matches): HtmlElement $mod->decorate($texy, $el); $parser->moveBackward(2); - $patternTerm = '~^\n?(\S.*)\:[\ \t]*' . Patterns::MODIFIER_H . '?()$~mUA'; + $patternTerm = '~^\n?(\S.*)\:[\ \t]*' . Patterns::MODIFIER_H . '?()$~mUAx'; while (true) { if ($elItem = $this->patternItem($parser, $bullet, true, 'dd')) { @@ -200,7 +200,7 @@ public function patternDefList(BlockParser $parser, array $matches): HtmlElement private function patternItem(BlockParser $parser, string $bullet, bool $indented, string $tag): ?HtmlElement { $spacesBase = $indented ? ('[\ \t]{1,}') : ''; - $patternItem = "~^\n?($spacesBase){$bullet}[ \\t]*(\\S.*)?" . Patterns::MODIFIER_H . '?()$~mAU'; + $patternItem = "~^\\n?($spacesBase){$bullet}[ \\t]*(\\S.*)?" . Patterns::MODIFIER_H . '?()$~mAUx'; // first line with bullet $matches = null; diff --git a/src/Texy/Modules/ParagraphModule.php b/src/Texy/Modules/ParagraphModule.php index 02f50967..257f8b94 100644 --- a/src/Texy/Modules/ParagraphModule.php +++ b/src/Texy/Modules/ParagraphModule.php @@ -39,7 +39,7 @@ public function process(Texy\BlockParser $parser, string $content, Texy\HtmlElem // try to find modifier $mod = null; - if ($mx = Regexp::match($s, '~' . Texy\Patterns::MODIFIER_H . '(?=\n|\z)~sUm', Regexp::OFFSET_CAPTURE)) { + if ($mx = Regexp::match($s, '~' . Texy\Patterns::MODIFIER_H . '(?=\n|\z)~sUmx', Regexp::OFFSET_CAPTURE)) { [$mMod] = $mx[1]; $s = trim(substr_replace($s, '', $mx[0][1], strlen($mx[0][0]))); if ($s === '') { diff --git a/src/Texy/Modules/PhraseModule.php b/src/Texy/Modules/PhraseModule.php index cac1bd7d..f8091f3f 100644 --- a/src/Texy/Modules/PhraseModule.php +++ b/src/Texy/Modules/PhraseModule.php @@ -53,7 +53,7 @@ public function __construct(Texy\Texy $texy) // UNIVERSAL $texy->registerLinePattern( array($this, 'patternPhrase'), - '~((?>([*+/^_"\~`-])+?))(?!\s)(.*(?!\2).)'.Texy\Patterns::MODIFIER.'?(?([*+/^_"\~`-])+?))(?!\s)(.*(?!\2).)'.Texy\Patterns::MODIFIER.'?(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?()"\'' . Patterns::MARK . '-])\*(?![\s*])((?:[^ *]++|[ *])+)' . Patterns::MODIFIER . '?(?()"?!\'-])(?::(' . Patterns::LINK_URL . '))??()~Us', + '~(?()"\'' . Patterns::MARK . '-])\*(?![\s*])((?:[^ *]++|[ *])+)' . Patterns::MODIFIER . '?(?()"?!\'-])(?::(' . Patterns::LINK_URL . '))??()~Usx', 'phrase/em-alt2', ); // ++inserted++ $texy->registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?-])((?:[^\r\n -]++|[ -])+)' . Patterns::MODIFIER . '?(?-])()~U', + '~(?-])((?:[^\r\n -]++|[ -])+)' . Patterns::MODIFIER . '?(?-])()~Ux', 'phrase/del', ); // ^^superscript^^ $texy->registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?>quote<< $texy->registerLinePattern( $this->patternPhrase(...), - '~(?)\>\>(?![\s>])((?:[^\r\n <]++|[ <])+)' . Patterns::MODIFIER . '?(?)\>\>(?![\s>])((?:[^\r\n <]++|[ <])+)' . Patterns::MODIFIER . '?(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternNoTexy(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~\`(\S(?:[^' . Patterns::MARK . '\r\n `]++|[ `])*)' . Patterns::MODIFIER . '?(?registerLinePattern( $this->patternPhrase(...), - '~([' . Patterns::CHAR . '0-9@#$%&.,_-]++)()(?=:\[)(?::(' . Patterns::LINK_URL . '))()~U', + '~([' . Patterns::CHAR . '0-9@#$%&.,_-]++)()(?=:\[)(?::(' . Patterns::LINK_URL . '))()~Ux', 'phrase/quicklink', ); // [text |link] $texy->registerLinePattern( $this->patternPhrase(...), - '~(?registerLinePattern( $this->patternPhrase(...), - '~(?registerBlockPattern( $this->patternTable(...), '~^(?:' . Patterns::MODIFIER_HV . '\n)?' // .{color: red} - . '\|.*()$~mU', // | .... + . '\|.*()$~mUx', // | .... 'table', ); } @@ -68,7 +68,7 @@ public function patternTable(Texy\BlockParser $parser, array $matches): HtmlElem $parser->moveBackward(); - if ($parser->next('~^\|(#|\=){2,}(?![|#=+])(.+)\1*\|?\ *' . Patterns::MODIFIER_H . '?()$~Um', $matches)) { + if ($parser->next('~^\|(\#|\=){2,}(?![|#=+])(.+)\1*\|?\ *' . Patterns::MODIFIER_H . '?()$~Umx', $matches)) { [, , $mContent, $mMod] = $matches; // [1] => # / = // [2] => .... @@ -94,7 +94,7 @@ public function patternTable(Texy\BlockParser $parser, array $matches): HtmlElem continue; } - if ($parser->next('~^\|(.*)(?:|\|[\ \t]*' . Patterns::MODIFIER_HV . '?)()$~U', $matches)) { + if ($parser->next('~^\|(.*)(?:|\|[\ \t]*' . Patterns::MODIFIER_HV . '?)()$~Ux', $matches)) { // smarter head detection if ($rowCounter === 0 && !$isHead && $parser->next('~^\|[=-][+|=-]{2,}$~Um', $foo)) { $isHead = true; @@ -228,7 +228,7 @@ private function processCell( Texy\Texy $texy, ): ?TableCellElement { - $matches = Regexp::match($cell, '~(\*??)[\ \t]*' . Patterns::MODIFIER_HV . '??(.*)' . Patterns::MODIFIER_HV . '?[\ \t]*()$~AU'); + $matches = Regexp::match($cell, '~(\*??)[\ \t]*' . Patterns::MODIFIER_HV . '??(.*)' . Patterns::MODIFIER_HV . '?[\ \t]*()$~AUx'); if (!$matches) { return null; }