Skip to content

Commit

Permalink
Improvements to the Regexp class API (BC break)
Browse files Browse the repository at this point in the history
  • Loading branch information
dg committed Jan 21, 2025
1 parent 21ddac5 commit 2c9fa6a
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 49 deletions.
8 changes: 4 additions & 4 deletions src/Texy/BlockParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ public function next(string $pattern, &$matches): bool
$matches = Regexp::match(
$this->text,
$pattern . 'Am', // anchored & multiline
Regexp::OFFSET_CAPTURE,
$this->offset,
captureOffset: true,
offset: $this->offset,
);

if ($matches) {
Expand Down Expand Up @@ -138,10 +138,10 @@ private function match(string $text): array
$priority = 0;
foreach ($this->patterns as $name => $pattern) {
/** @var array<int, array<int, array{string, int}>>|null $ms */
$ms = Regexp::match(
$ms = Regexp::matchAll(
$text,
$pattern['pattern'],
Regexp::OFFSET_CAPTURE | Regexp::ALL,
captureOffset: true,
);

foreach ((array) $ms as $m) {
Expand Down
2 changes: 1 addition & 1 deletion src/Texy/Helpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public static function outdent(string $s, bool $firstLine = false): string
$min = strspn($s, ' ');
} else {
$min = strlen($s);
foreach (Regexp::match($s, '#^ *\S#m', Regexp::ALL) as $m) {
foreach (Regexp::matchAll($s, '#^ *\S#m') as $m) {
$min = min($min, strlen($m[0]) - 1);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/Texy/LineParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ private function match(string $text, int $offset, array &$names, array &$offsets
} elseif ($matches[$name] = Regexp::match(
$text,
$this->patterns[$name]['pattern'],
Regexp::OFFSET_CAPTURE,
$offset + $delta,
captureOffset: true,
offset: $offset + $delta,
)) {
$m = &$matches[$name];
if (!strlen($m[0][0])) {
Expand All @@ -132,7 +132,7 @@ private function match(string $text, int $offset, array &$names, array &$offsets
// try next time?
if (
!$this->patterns[$name]['again']
|| !Regexp::match($text, $this->patterns[$name]['again'], 0, $offset + $delta)
|| !Regexp::match($text, $this->patterns[$name]['again'], offset: $offset + $delta)
) {
unset($names[$index]);
}
Expand Down
2 changes: 1 addition & 1 deletion src/Texy/Modules/ParagraphModule.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public function process(Texy\BlockParser $parser, string $content, Texy\HtmlElem

// try to find modifier
$mod = null;
if ($mx = Regexp::match($s, '#' . Texy\Patterns::MODIFIER_H . '(?=\n|\z)#sUm', Regexp::OFFSET_CAPTURE)) {
if ($mx = Regexp::match($s, '#' . Texy\Patterns::MODIFIER_H . '(?=\n|\z)#sUm', captureOffset: true)) {
[$mMod] = $mx[1];
$s = trim(substr_replace($s, '', $mx[0][1], strlen($mx[0][0])));
if ($s === '') {
Expand Down
111 changes: 71 additions & 40 deletions src/Texy/Regexp.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,79 +9,110 @@

namespace Texy;

use JetBrains\PhpStorm\Language;


class Regexp
{
public const ALL = 1;
public const OFFSET_CAPTURE = 2;
/**
* Divides the string into arrays according to the regular expression. Expressions in parentheses will be captured and returned as well.
*/
public static function split(
string $subject,
#[Language('RegExp')]
string $pattern,
bool $captureOffset = false,
bool $skipEmpty = false,
int $limit = -1,
): array
{
$flags = ($captureOffset ? PREG_SPLIT_OFFSET_CAPTURE : 0) | ($skipEmpty ? PREG_SPLIT_NO_EMPTY : 0);
return self::pcre('preg_split', [$pattern, $subject, $limit, $flags | PREG_SPLIT_DELIM_CAPTURE]);
}


/**
* Splits string by a regular expression.
* @param int $flags OFFSET_CAPTURE
* Searches the string for the part matching the regular expression and returns
* an array with the found expression and individual subexpressions, or `null`.
*/
public static function split(string $subject, string $pattern, int $flags = 0): array
public static function match(
string $subject,
#[Language('RegExp')]
string $pattern,
bool $captureOffset = false,
int $offset = 0,
): ?array
{
$reFlags = (($flags & self::OFFSET_CAPTURE) ? PREG_SPLIT_OFFSET_CAPTURE : 0) | PREG_SPLIT_DELIM_CAPTURE;
$res = preg_split($pattern, $subject, -1, $reFlags);
if (preg_last_error()) { // run-time error
trigger_error(preg_last_error_msg(), E_USER_WARNING);
$flags = ($captureOffset ? PREG_OFFSET_CAPTURE : 0);
if ($offset > strlen($subject)) {
return null;
} elseif (!self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])) {
return null;
} else {
return $m;
}

return $res;
}


/**
* Performs a regular expression match.
* @param int $flags OFFSET_CAPTURE, ALL
* Searches the string for all occurrences matching the regular expression and
* returns an array of arrays containing the found expression and each subexpression.
* @return array[]
*/
public static function match(string $subject, string $pattern, int $flags = 0, int $offset = 0): mixed
public static function matchAll(
string $subject,
#[Language('RegExp')]
string $pattern,
bool $captureOffset = false,
int $offset = 0,
): array
{
$empty = $flags & self::ALL ? [] : null;
if ($offset > strlen($subject)) {
return $empty;
}

$reFlags = ($flags & self::OFFSET_CAPTURE) ? PREG_OFFSET_CAPTURE : 0;
$res = $flags & self::ALL
? preg_match_all($pattern, $subject, $m, $reFlags | PREG_SET_ORDER, $offset)
: preg_match($pattern, $subject, $m, $reFlags, $offset);
if (preg_last_error()) { // run-time error
trigger_error(preg_last_error_msg(), E_USER_WARNING);
} elseif ($res) {
return $m;
return [];
}

return $empty;
$flags = ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | PREG_SET_ORDER;
self::pcre('preg_match_all', [$pattern, $subject, &$m, $flags, $offset]);
return $m;
}


/**
* Perform a regular expression search and replace.
* Replaces all occurrences matching regular expression $pattern which can be string or array in the form `pattern => replacement`.
*/
public static function replace(
string $subject,
#[Language('RegExp')]
string|array $pattern,
string|callable|null $replacement = null,
string|callable $replacement = '',
int $limit = -1,
bool $captureOffset = false,
): string
{
if (is_object($replacement) || is_array($replacement)) {
$res = preg_replace_callback($pattern, $replacement, $subject);
if ($res === null && preg_last_error()) { // run-time error
trigger_error(preg_last_error_msg(), E_USER_WARNING);
if (!is_callable($replacement, false, $textual)) {
throw new \InvalidStateException("Callback '$textual' is not callable.");
}

return $res;
$flags = ($captureOffset ? PREG_OFFSET_CAPTURE : 0);
return self::pcre('preg_replace_callback', [$pattern, $replacement, $subject, $limit, 0, $flags]);

} elseif (is_array($pattern) && is_string(key($pattern))) {
return self::pcre('preg_replace', [array_keys($pattern), array_values($pattern), $subject, $limit]);

} elseif ($replacement === null && is_array($pattern)) {
$replacement = array_values($pattern);
$pattern = array_keys($pattern);
} else {
return self::pcre('preg_replace', [$pattern, $replacement, $subject, $limit]);
}
}

$res = preg_replace($pattern, $replacement, $subject);
if (preg_last_error()) { // run-time error
trigger_error(preg_last_error_msg(), E_USER_WARNING);

/** @internal */
public static function pcre(string $func, array $args)
{
$res = @$func(...$args);
if (($code = preg_last_error()) // run-time error, but preg_last_error & return code are liars
&& ($res === null || !in_array($func, ['preg_replace_callback', 'preg_replace'], true))
) {
throw new RegexpException(preg_last_error_msg() . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
}

return $res;
Expand Down
18 changes: 18 additions & 0 deletions src/Texy/exceptions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?php

/**
* This file is part of the Texy! (https://texy.info)
* Copyright (c) 2004 David Grudl (https://davidgrudl.com)
*/

declare(strict_types=1);

namespace Texy;


/**
* Regular expression pattern or execution failed.
*/
class RegexpException extends \Exception
{
}

0 comments on commit 2c9fa6a

Please sign in to comment.