Skip to content

Commit

Permalink
regexp: escaped space
Browse files Browse the repository at this point in the history
  • Loading branch information
dg committed Jan 21, 2025
1 parent 084aee8 commit 59c1fd2
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 27 deletions.
4 changes: 2 additions & 2 deletions src/Texy/Modules/BlockModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public function __construct(Texy\Texy $texy)

$texy->registerBlockPattern(
$this->pattern(...),
'#^/--++ *+(.*)' . Texy\Patterns::MODIFIER_H . '?$((?:\n(?0)|\n.*+)*)(?:\n\\\--.*$|\z)#mUi',
'#^/--++\ *+(.*)' . Texy\Patterns::MODIFIER_H . '?$((?:\n(?0)|\n.*+)*)(?:\n\\\--.*$|\z)#mUi',
'blocks',
);
}
Expand All @@ -52,7 +52,7 @@ private function beforeBlockParse(Texy\BlockParser $parser, string &$text): void
// autoclose exclusive blocks
$text = Texy\Regexp::replace(
$text,
'#^(/--++ *+(?!div|texysource).*)$((?:\n.*+)*?)(?:\n\\\--.*$|(?=(\n/--.*$)))#mi',
'#^(/--++\ *+(?!div|texysource).*)$((?:\n.*+)*?)(?:\n\\\--.*$|(?=(\n/--.*$)))#mi',
"\$1\$2\n\\--",
);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Texy/Modules/FigureModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public function __construct(Texy\Texy $texy)

$texy->registerBlockPattern(
$this->pattern(...),
'#^\[\* *+([^\n' . Patterns::MARK . ']{1,1000})' . Patterns::MODIFIER . '? *+(\*|(?<!<)>|<)\]' // [* urls .(title)[class]{style} >]
. '(?::(' . Patterns::LINK_URL . '|:))?? ++\*\*\* ++(.{0,2000})' . Patterns::MODIFIER_H . '?()$#mUu',
'#^\[\*\ *+([^\n' . Patterns::MARK . ']{1,1000})' . Patterns::MODIFIER . '?\ *+(\*|(?<!<)>|<)\]' // [* urls .(title)[class]{style} >]
. '(?::(' . Patterns::LINK_URL . '|:))??\ ++\*\*\*\ ++(.{0,2000})' . Patterns::MODIFIER_H . '?()$#mUu',
'figure',
);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Texy/Modules/ImageModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public function __construct(Texy\Texy $texy)
// [*image*]:LINK
$texy->registerLinePattern(
$this->patternImage(...),
'#\[\* *+([^\n' . Patterns::MARK . ']{1,1000})' . Patterns::MODIFIER . '? *+(\*|(?<!<)>|<)\]' // [* urls .(title)[class]{style} >]
'#\[\*\ *+([^\n' . Patterns::MARK . ']{1,1000})' . Patterns::MODIFIER . '?\ *+(\*|(?<!<)>|<)\]' // [* urls .(title)[class]{style} >]
. '(?::(' . Patterns::LINK_URL . '|:))??()#Uu',
'image',
);
Expand Down
2 changes: 1 addition & 1 deletion src/Texy/Modules/LinkModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ private function beforeParse(Texy\Texy $texy, &$text): void
if (!empty($texy->allowed['link/definition'])) {
$text = Texy\Regexp::replace(
$text,
'#^\[([^\[\]\#\?\*\n]{1,100})\]: ++(\S{1,1000})([\ \t].{1,1000})?' . Patterns::MODIFIER . '?\s*()$#mUu',
'#^\[([^\[\]\#\?\*\n]{1,100})\]:\ ++(\S{1,1000})([\ \t].{1,1000})?' . Patterns::MODIFIER . '?\s*()$#mUu',
$this->patternReferenceDef(...),
);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Texy/Modules/ParagraphModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public function __construct(Texy\Texy $texy)
public function process(Texy\BlockParser $parser, string $content, Texy\HtmlElement $el): void
{
$parts = $parser->isIndented()
? preg_split('#(\n(?! )|\n{2,})#', $content, -1, PREG_SPLIT_NO_EMPTY)
? preg_split('#(\n(?!\ )|\n{2,})#', $content, -1, PREG_SPLIT_NO_EMPTY)
: preg_split('#(\n{2,})#', $content, -1, PREG_SPLIT_NO_EMPTY);

foreach ($parts as $s) {
Expand Down Expand Up @@ -73,7 +73,7 @@ private function solve(
$content = $texy->mergeLines
// ....
// ... => \r means break line
? Regexp::replace($content, '#\n +(?=\S)#', "\r")
? Regexp::replace($content, '#\n\ +(?=\S)#', "\r")
: Regexp::replace($content, '#\n#', "\r");

$el = new Texy\HtmlElement('p');
Expand Down
2 changes: 1 addition & 1 deletion src/Texy/Modules/TableModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public function patternTable(Texy\BlockParser $parser, array $matches): HtmlElem

$parser->moveBackward();

if ($parser->next('#^\|(\#|\=){2,}(?![|\#=+])(.+)\1*\|? *' . Patterns::MODIFIER_H . '?()$#Um', $matches)) {
if ($parser->next('#^\|(\#|\=){2,}(?![|\#=+])(.+)\1*\|?\ *' . Patterns::MODIFIER_H . '?()$#Um', $matches)) {
[, , $mContent, $mMod] = $matches;
// [1] => # / =
// [2] => ....
Expand Down
28 changes: 14 additions & 14 deletions src/Texy/Modules/TypographyModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,23 @@ final class TypographyModule extends Texy\Module
'#(?<=[\d ]|^)-(?=[\d ]|$)#' /*. */ => "\u{2013}", // en dash 123-123
'#(?<=[^!*+,/:;<=>@\\\_|-])--(?=[^!*+,/:;<=>@\\\_|-])#' => "\u{2013}", // en dash alphanum--alphanum
'#,-#' /*. */ => ",\u{2013}", // en dash ,-
'#(?<!\d)(\d{1,2}\.) (\d{1,2}\.) (\d\d)#' => "\$1\u{A0}\$2\u{A0}\$3", // date 23. 1. 1978
'#(?<!\d)(\d{1,2}\.) (\d{1,2}\.)#' /*. */ => "\$1\u{A0}\$2", // date 23. 1.
'# --- #' /*. */ => "\u{A0}\u{2014} ", // em dash ---
'# ([\x{2013}\x{2014}])#u' /*. */ => "\u{A0}\$1", // &nbsp; behind dash (dash stays at line end)
'# <-{1,2}> #' /*. */ => " \u{2194} ", // left right arrow <-->
'#(?<!\d)(\d{1,2}\.)\ (\d{1,2}\.)\ (\d\d)#' => "\$1\u{A0}\$2\u{A0}\$3", // date 23. 1. 1978
'#(?<!\d)(\d{1,2}\.)\ (\d{1,2}\.)#' /*. */ => "\$1\u{A0}\$2", // date 23. 1.
'#\ ---\ #' /*. */ => "\u{A0}\u{2014} ", // em dash ---
'#\ ([\x{2013}\x{2014}])#u' /*. */ => "\u{A0}\$1", // &nbsp; behind dash (dash stays at line end)
'#\ <-{1,2}> #' /*. */ => " \u{2194} ", // left right arrow <-->
'#-{1,}> #' /*. */ => "\u{2192} ", // right arrow -->
'# <-{1,}#' /*. */ => " \u{2190} ", // left arrow <--
'#={1,}> #' /*. */ => "\u{21D2} ", // right arrow ==>
'#\ <-{1,}#' /*. */ => " \u{2190} ", // left arrow <--
'#={1,}>\ #' /*. */ => "\u{21D2} ", // right arrow ==>
'#\+-#' /*. */ => "\u{B1}", // +-
'#(\d++) x (?=\d)#' /*. */ => "\$1\u{A0}\u{D7}\u{A0}", // dimension sign 123 x 123...
'#(\d++)\ x\ (?=\d)#' /*. */ => "\$1\u{A0}\u{D7}\u{A0}", // dimension sign 123 x 123...
'#(\d++)x(?=\d)#' /*. */ => "\$1\u{D7}", // dimension sign 123x123...
'#(?<=\d)x(?= |,|\.|$)#m' /*. */ => "\u{D7}", // dimension sign 123x
'#(\S ?)\(TM\)#i' /*. */ => "\$1\u{2122}", // trademark (TM)
'#(\S ?)\(R\)#i' /*. */ => "\$1\u{AE}", // registered (R)
'#\(C\)( ?\S)#i' /*. */ => "\u{A9}\$1", // copyright (C)
'#(?<=\d)x(?=\ |,|\.|$)#m' /*. */ => "\u{D7}", // dimension sign 123x
'#(\S\ ?)\(TM\)#i' /*. */ => "\$1\u{2122}", // trademark (TM)
'#(\S\ ?)\(R\)#i' /*. */ => "\$1\u{AE}", // registered (R)
'#\(C\)(\ ?\S)#i' /*. */ => "\u{A9}\$1", // copyright (C)
'#\(EUR\)#' /*. */ => "\u{20AC}", // Euro (EUR)
'#(\d) (?=\d{3})#' /*. */ => "\$1\u{A0}", // (phone) number 1 123 123 123...
'#(\d)\ (?=\d{3})#' /*. */ => "\$1\u{A0}", // (phone) number 1 123 123 123...

// CONTENT_MARKUP mark: \x17-\x1F, CONTENT_REPLACED mark: \x16, CONTENT_TEXTUAL mark: \x17
'#(?<=[^\s\x17])\s++([\x17-\x1F]++)(?=\s)#u' => '$1', // remove intermarkup space phase 1
Expand All @@ -77,7 +77,7 @@ final class TypographyModule extends Texy\Module
'#(?<=.{50})\s++(?=[\x17-\x1F]*\S{1,6}[\x17-\x1F]*$)#us' => "\u{A0}", // space before last short word

// nbsp space between number (optionally followed by dot) and word, symbol, punctation, currency symbol
'#(?<=^| |\.|,|-|\+|\x16|\(|\d\x{A0})([\x17-\x1F]*\d++\.?[\x17-\x1F]*)\s++(?=[\x17-\x1F]*[%' . Patterns::CHAR . '\x{b0}-\x{be}\x{2020}-\x{214f}])#mu' => "\$1\u{A0}",
'#(?<=^|\ |\.|,|-|\+|\x16|\(|\d\x{A0})([\x17-\x1F]*\d++\.?[\x17-\x1F]*)\s++(?=[\x17-\x1F]*[%' . Patterns::CHAR . '\x{b0}-\x{be}\x{2020}-\x{214f}])#mu' => "\$1\u{A0}",
// space between preposition and word
'#(?<=^|[^0-9' . Patterns::CHAR . '])([\x17-\x1F]*[ksvzouiKSVZOUIA][\x17-\x1F]*)\s++(?=[\x17-\x1F]*[0-9' . Patterns::CHAR . '])#mus' => "\$1\u{A0}",

Expand Down
8 changes: 4 additions & 4 deletions src/Texy/Patterns.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ class Patterns
public const MARK = '\x14-\x1F';

// modifier .(title)[class]{style}
public const MODIFIER = '(?: *+(?<= |^)\.((?:\((?:\\\\\)|[^)\n])++\)|\[[^\]\n]++\]|\{[^}\n]++\}){1,3}?))';
public const MODIFIER = '(?:\ *+(?<=\ |^)\.((?:\((?:\\\\\)|[^)\n])++\)|\[[^\]\n]++\]|\{[^}\n]++\}){1,3}?))';

// modifier .(title)[class]{style}<>
public const MODIFIER_H = '(?: *+(?<= |^)\.((?:\((?:\\\\\)|[^)\n])++\)|\[[^\]\n]++\]|\{[^}\n]++\}|<>|>|=|<){1,4}?))';
public const MODIFIER_H = '(?:\ *+(?<=\ |^)\.((?:\((?:\\\\\)|[^)\n])++\)|\[[^\]\n]++\]|\{[^}\n]++\}|<>|>|=|<){1,4}?))';

// modifier .(title)[class]{style}<>^
public const MODIFIER_HV = '(?: *+(?<= |^)\.((?:\((?:\\\\\)|[^)\n])++\)|\[[^\]\n]++\]|\{[^}\n]++\}|<>|>|=|<|\^|\-|\_){1,5}?))';
public const MODIFIER_HV = '(?:\ *+(?<=\ |^)\.((?:\((?:\\\\\)|[^)\n])++\)|\[[^\]\n]++\]|\{[^}\n]++\}|<>|>|=|<|\^|\-|\_){1,5}?))';

// images [* urls .(title)[class]{style} >] '\[\* *+([^\n'.MARK.']{1,1000})'.MODIFIER.'? *+(\*|(?<!<)>|<)\]'
public const IMAGE = '\[\* *+([^\n\x14-\x1F]{1,1000})(?: *+(?<= |^)\.((?:\([^)\n]++\)|\[[^\]\n]++\]|\{[^}\n]++\}){1,3}?))? *+(\*|(?<!<)>|<)\]';
public const IMAGE = '\[\*\ *+([^\n\x14-\x1F]{1,1000})(?:\ *+(?<=\ |^)\.((?:\([^)\n]++\)|\[[^\]\n]++\]|\{[^}\n]++\}){1,3}?))?\ *+(\*|(?<!<)>|<)\]';

// links, url - doesn't end by :).,!?
public const LINK_URL = '(?:\[[^\]\n]++\]|(?=[\w/+.~%&?@=_\#$])[^\s\x14-\x1F]{0,1000}?[^:);,.!?\s\x14-\x1F])'; // any url - doesn't end by :).,!?
Expand Down

0 comments on commit 59c1fd2

Please sign in to comment.