diff --git a/CHANGELOG.md b/CHANGELOG.md index 9532023..084db75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.3.2] - 2023-12-01 +### Fixed +* When creating a `CssSelector` or `XPathQuery` instance with invalid selector/query syntax, an `InvalidDomQueryException` is now immediately thrown. This change is considered to be not only non-breaking, but actually a fix, because the `CssSelector` would otherwise throw an exception later when the `apply()` method is called. The `XPathQuery` would silently return no result without notifying you of the invalid query and generate a PHP warning. + ## [1.3.1] - 2023-11-30 ### Fixed * Support usage with the new Symfony major version v7. diff --git a/src/Steps/Html/CssSelector.php b/src/Steps/Html/CssSelector.php index 857ba31..65b2db2 100644 --- a/src/Steps/Html/CssSelector.php +++ b/src/Steps/Html/CssSelector.php @@ -2,10 +2,28 @@ namespace Crwlr\Crawler\Steps\Html; +use Crwlr\Crawler\Steps\Html\Exceptions\InvalidDomQueryException; +use Symfony\Component\CssSelector\CssSelectorConverter; +use Symfony\Component\CssSelector\Exception\ExpressionErrorException; +use Symfony\Component\CssSelector\Exception\SyntaxErrorException; use Symfony\Component\DomCrawler\Crawler; final class CssSelector extends DomQuery { + /** + * @throws InvalidDomQueryException + */ + public function __construct(string $query) + { + try { + (new CssSelectorConverter())->toXPath($query); + } catch (ExpressionErrorException|SyntaxErrorException $exception) { + throw InvalidDomQueryException::fromSymfonyException($query, $exception); + } + + parent::__construct($query); + } + public function filter(Crawler $domCrawler): Crawler { return $domCrawler->filter($this->query); diff --git a/src/Steps/Html/Exceptions/InvalidDomQueryException.php b/src/Steps/Html/Exceptions/InvalidDomQueryException.php new file mode 100644 index 0000000..9c414b6 --- /dev/null +++ b/src/Steps/Html/Exceptions/InvalidDomQueryException.php @@ -0,0 +1,41 @@ +setDomQuery($domQuery); + + return $exception; + } + + public static function fromSymfonyException( + string $domQuery, + ExpressionErrorException|SyntaxErrorException $originalException, + ): self { + $exception = new self( + $originalException->getMessage(), + $originalException->getCode(), + $originalException, + ); + + $exception->setDomQuery($domQuery); + + return $exception; + } + + public function setDomQuery(string $domQuery): void + { + $this->query = $domQuery; + } +} diff --git a/src/Steps/Html/GetLink.php b/src/Steps/Html/GetLink.php index 9d212e7..a6c0b05 100644 --- a/src/Steps/Html/GetLink.php +++ b/src/Steps/Html/GetLink.php @@ -32,7 +32,12 @@ class GetLink extends Step protected bool $withFragment = true; - public function __construct(protected ?string $selector = null) {} + protected null|string|CssSelector $selector = null; + + public function __construct(null|string|CssSelector $selector = null) + { + $this->selector = is_string($selector) ? new CssSelector($selector) : $selector; + } public static function isSpecialNonHttpLink(Crawler $linkElement): bool { @@ -65,7 +70,11 @@ protected function invoke(mixed $input): Generator $selector = $this->selector ?? 'a'; - foreach ($input->filter($selector) as $link) { + if (is_string($selector)) { + $selector = new CssSelector($selector); + } + + foreach ($selector->filter($input) as $link) { $linkUrl = $this->getLinkUrl($link); if ($linkUrl) { diff --git a/src/Steps/Html/GetLinks.php b/src/Steps/Html/GetLinks.php index f218147..04ac2d9 100644 --- a/src/Steps/Html/GetLinks.php +++ b/src/Steps/Html/GetLinks.php @@ -19,7 +19,11 @@ protected function invoke(mixed $input): Generator $selector = $this->selector ?? 'a'; - foreach ($input->filter($selector) as $link) { + if (is_string($selector)) { + $selector = new CssSelector($selector); + } + + foreach ($selector->filter($input) as $link) { $linkUrl = $this->getLinkUrl($link); if ($linkUrl) { diff --git a/src/Steps/Html/XPathQuery.php b/src/Steps/Html/XPathQuery.php index ba484b6..ddf9b0c 100644 --- a/src/Steps/Html/XPathQuery.php +++ b/src/Steps/Html/XPathQuery.php @@ -2,12 +2,48 @@ namespace Crwlr\Crawler\Steps\Html; +use Crwlr\Crawler\Steps\Html\Exceptions\InvalidDomQueryException; +use DOMDocument; +use DOMXPath; use Symfony\Component\DomCrawler\Crawler; class XPathQuery extends DomQuery { + /** + * @throws InvalidDomQueryException + */ + public function __construct(string $query) + { + $this->validateQuery($query); + + parent::__construct($query); + } + public function filter(Crawler $domCrawler): Crawler { return $domCrawler->filterXPath($this->query); } + + /** + * @throws InvalidDomQueryException + */ + private function validateQuery(string $query): void + { + // Temporarily set a new error handler, so checking an invalid XPath query does not generate a PHP warning. + $previousHandler = set_error_handler(function ($errno, $errstr) { + if ($errno === E_WARNING && $errstr === 'DOMXPath::evaluate(): Invalid expression') { + return true; + } + + return false; + }); + + if ((new DOMXPath(new DOMDocument()))->evaluate($query) === false) { + set_error_handler($previousHandler); + + throw InvalidDomQueryException::make('Invalid XPath query', $query); + } else { + set_error_handler($previousHandler); + } + } } diff --git a/tests/Steps/Html/CssSelectorTest.php b/tests/Steps/Html/CssSelectorTest.php index 6e96d25..01bcf60 100644 --- a/tests/Steps/Html/CssSelectorTest.php +++ b/tests/Steps/Html/CssSelectorTest.php @@ -3,10 +3,15 @@ namespace tests\Steps\Html; use Crwlr\Crawler\Steps\Html\CssSelector; +use Crwlr\Crawler\Steps\Html\Exceptions\InvalidDomQueryException; use Symfony\Component\DomCrawler\Crawler; use function tests\helper_getSimpleListHtml; +it('throws an exception when created with an invalid CSS Selector', function ($selector) { + new CssSelector($selector); +})->throws(InvalidDomQueryException::class)->with(['.foo;', '.foo:before']); + test('The apply method returns a string for a single match', function () { $html = '