diff --git a/CHANGELOG.md b/CHANGELOG.md index e74bf492..494c6243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.1.0] - 2024-10-19 +### Added +* The new `postBrowserNavigateHook()` method in the `Http` step classes, which allows to define callback functions that are triggered after the headless browser navigated to the specified URL. They are called with the chrome-php `Page` object as argument, so you can interact with the page. Also, there is a new class `BrowserAction` providing some simple actions (like wait for element, click element,...) as Closures via static methods. You can use it like `Http::get()->postBrowserNavigateHook(BrowserAction::clickElement('#element'))`. + ## [2.0.1] - 2024-10-15 ### Fixed * Issue with the `afterLoad` hook of the `HttpLoader`, introduced in v2. Calling the hook was commented out, which slipped through because the test case was faulty. diff --git a/src/Loader/Http/HeadlessBrowserLoaderHelper.php b/src/Loader/Http/HeadlessBrowserLoaderHelper.php index 0a172d4c..04db8abf 100644 --- a/src/Loader/Http/HeadlessBrowserLoaderHelper.php +++ b/src/Loader/Http/HeadlessBrowserLoaderHelper.php @@ -2,6 +2,7 @@ namespace Crwlr\Crawler\Loader\Http; +use Closure; use Crwlr\Crawler\Loader\Http\Messages\RespondedRequest; use Crwlr\Crawler\Loader\Http\Politeness\Throttler; use Exception; @@ -42,8 +43,28 @@ class HeadlessBrowserLoaderHelper protected int $timeout = 30_000; + /** + * @var Closure[] + */ + protected array $tempPostNavigateHooks = []; + public function __construct(private ?BrowserFactory $browserFactory = null) {} + /** + * Set temporary post navigate hooks + * + * They will be executed after the next call to navigateToPageAndGetRespondedRequest() + * and forgotten afterward. + * + * @param Closure[] $hooks + */ + public function setTempPostNavigateHooks(array $hooks): static + { + $this->tempPostNavigateHooks = $hooks; + + return $this; + } + /** * @throws OperationTimedOut * @throws CommunicationException @@ -81,6 +102,8 @@ function ($params) use (&$statusCode, &$responseHeaders) { $this->navigate($request->getUri()->__toString()); + $this->callPostNavigateHooks(); + $throttler->trackRequestEndFor($request->getUri()); $html = $this->page?->getHtml(); @@ -196,6 +219,17 @@ protected function navigate(string $url): void } } + protected function callPostNavigateHooks(): void + { + if (!empty($this->tempPostNavigateHooks)) { + foreach ($this->tempPostNavigateHooks as $hook) { + $hook->call($this, $this->page); + } + } + + $this->tempPostNavigateHooks = []; + } + /** * @throws Exception */ diff --git a/src/Steps/Loading/Http/Browser/BrowserAction.php b/src/Steps/Loading/Http/Browser/BrowserAction.php new file mode 100644 index 00000000..79d6ae00 --- /dev/null +++ b/src/Steps/Loading/Http/Browser/BrowserAction.php @@ -0,0 +1,54 @@ +waitUntilContainsElement($cssSelector); + }; + } + + public static function clickElement(string $cssSelector): Closure + { + return function (Page $page) use ($cssSelector) { + $page->mouse()->find($cssSelector)->click(); + }; + } + + public static function clickElementAndWaitForReload(string $cssSelector): Closure + { + return function (Page $page) use ($cssSelector) { + $page->mouse()->find($cssSelector)->click(); + + $page->waitForReload(); + }; + } + + public static function evaluate(string $jsCode): Closure + { + return function (Page $page) use ($jsCode) { + $page->evaluate($jsCode); + }; + } + + public static function evaluateAndWaitForReload(string $jsCode): Closure + { + return function (Page $page) use ($jsCode) { + $page->evaluate($jsCode)->waitForPageReload(); + }; + } + + public static function wait(float $seconds): Closure + { + return function (Page $page) use ($seconds) { + usleep(Microseconds::fromSeconds($seconds)->value); + }; + } +} diff --git a/src/Steps/Loading/HttpBase.php b/src/Steps/Loading/HttpBase.php index 763c4316..cd9355a2 100644 --- a/src/Steps/Loading/HttpBase.php +++ b/src/Steps/Loading/HttpBase.php @@ -2,6 +2,7 @@ namespace Crwlr\Crawler\Steps\Loading; +use Closure; use Crwlr\Crawler\Loader\Http\Exceptions\LoadingException; use Crwlr\Crawler\Loader\Http\HttpLoader; use Crwlr\Crawler\Loader\Http\Messages\RespondedRequest; @@ -42,6 +43,11 @@ abstract class HttpBase extends Step */ protected ?array $inputHeaders = null; + /** + * @var Closure[] + */ + protected array $postBrowserNavigateHooks = []; + /** * @param string $method * @param array $headers @@ -127,6 +133,13 @@ public function useInputKeyAsHeaders(string $key): static return $this; } + public function postBrowserNavigateHook(Closure $callback): static + { + $this->postBrowserNavigateHooks[] = $callback; + + return $this; + } + /** * @return UriInterface|UriInterface[] * @throws InvalidArgumentException @@ -185,10 +198,16 @@ protected function getRequestFromInputUri(UriInterface $uri): RequestInterface */ protected function getResponseFromRequest(RequestInterface $request): ?RespondedRequest { + $loader = $this->getLoader(); + + if (!empty($this->postBrowserNavigateHooks) && $loader->usesHeadlessBrowser()) { + $loader->browser()->setTempPostNavigateHooks($this->postBrowserNavigateHooks); + } + if ($this->stopOnErrorResponse) { - $response = $this->getLoader()->loadOrFail($request); + $response = $loader->loadOrFail($request); } else { - $response = $this->getLoader()->load($request); + $response = $loader->load($request); } if ($response !== null && ($response->response->getStatusCode() < 400 || $this->yieldErrorResponses)) { diff --git a/tests/Loader/Http/HeadlessBrowserLoaderHelperTest.php b/tests/Loader/Http/HeadlessBrowserLoaderHelperTest.php index d626ed14..a192dba9 100644 --- a/tests/Loader/Http/HeadlessBrowserLoaderHelperTest.php +++ b/tests/Loader/Http/HeadlessBrowserLoaderHelperTest.php @@ -133,3 +133,43 @@ function helper_setUpHeadlessChromeMocks( expect($invadedBrowserFactory->chromeBinary)->toBe($chromeExecutable); }); + +it('calls the temporary post navigate hooks once', function () { + $browserFactoryMock = helper_setUpHeadlessChromeMocks(); + + $helper = new HeadlessBrowserLoaderHelper($browserFactoryMock); + + $hook1Called = $hook2Called = $hook3Called = false; + + $helper->setTempPostNavigateHooks([ + function (Page $page) use (& $hook1Called) { + $hook1Called = true; + }, + function (Page $page) use (& $hook2Called) { + $hook2Called = true; + }, + function (Page $page) use (& $hook3Called) { + $hook3Called = true; + }, + ]); + + $helper->navigateToPageAndGetRespondedRequest( + new Request('GET', 'https://www.example.com/foo'), + helper_getMinThrottler(), + ); + + expect($hook1Called)->toBeTrue() + ->and($hook2Called)->toBeTrue() + ->and($hook3Called)->toBeTrue(); + + $hook1Called = $hook2Called = $hook3Called = false; + + $helper->navigateToPageAndGetRespondedRequest( + new Request('GET', 'https://www.example.com/foo'), + helper_getMinThrottler(), + ); + + expect($hook1Called)->toBeFalse() + ->and($hook2Called)->toBeFalse() + ->and($hook3Called)->toBeFalse(); +}); diff --git a/tests/_Integration/Http/HeadlessBrowserTest.php b/tests/_Integration/Http/HeadlessBrowserTest.php index a3b17692..4f8d7796 100644 --- a/tests/_Integration/Http/HeadlessBrowserTest.php +++ b/tests/_Integration/Http/HeadlessBrowserTest.php @@ -6,6 +6,7 @@ use Crwlr\Crawler\Loader\LoaderInterface; use Crwlr\Crawler\Steps\Html; use Crwlr\Crawler\Steps\Loading\Http; +use Crwlr\Crawler\Steps\Loading\Http\Browser\BrowserAction; use Crwlr\Crawler\Steps\Step; use Crwlr\Crawler\UserAgents\UserAgent; use Crwlr\Crawler\UserAgents\UserAgentInterface; @@ -131,3 +132,115 @@ protected function invoke(mixed $input): Generator ->and($results[0]->get('printed-cookie'))->toBeString() ->and($results[0]->get('printed-cookie'))->toBe('javascriptcookie'); }); + +it('gets a cookie that is set via a click, executed via post browser navigate hook', function () { + $crawler = new HeadlessBrowserCrawler(); + + $crawler + ->input('http://localhost:8000/set-delayed-js-cookie') + ->addStep( + Http::get() + ->postBrowserNavigateHook(BrowserAction::clickElement('#setCookieButton')), + ) + ->addStep(new class extends Step { + protected function invoke(mixed $input): Generator + { + yield 'http://localhost:8000/print-cookie'; + } + }) + ->addStep(Http::get()) + ->addStep((new GetStringFromResponseHtmlBody())->keepAs('printed-cookie')); + + $results = helper_generatorToArray($crawler->run()); + + expect($results)->toHaveCount(1) + ->and($results[0]->get('printed-cookie'))->toBeString() + ->and($results[0]->get('printed-cookie'))->toBe('jscookie'); +}); + +test('BrowserActions waitUntilDocumentContainsElement(), clickElement() and evaluate() work as expected', function () { + $crawler = new HeadlessBrowserCrawler(); + + $crawler + ->input('http://localhost:8000/browser-actions') + ->addStep( + Http::get() + ->postBrowserNavigateHook( + BrowserAction::waitUntilDocumentContainsElement('#delayed_el_container #delayed_el'), + ) + ->postBrowserNavigateHook(BrowserAction::clickElement('#click_element')) + ->postBrowserNavigateHook( + BrowserAction::evaluate( + 'document.getElementById(\'evaluation_container\').innerHTML = \'evaluated\'', + ), + ) + ->keep('body'), + ); + + $results = helper_generatorToArray($crawler->run()); + + $body = $results[0]->get('body'); + + expect($body)->toContain('
a
') + ->and($body)->toContain('
yes
') + ->and($body)->toContain('
evaluated
'); +}); + +test('BrowserAction::clickElementAndWaitForReload() works as expected', function () { + $crawler = new HeadlessBrowserCrawler(); + + $crawler + ->input('http://localhost:8000/browser-actions/click-and-wait-for-reload') + ->addStep( + Http::get() + ->postBrowserNavigateHook(BrowserAction::clickElementAndWaitForReload('#click')) + ->keep('body'), + ); + + $results = helper_generatorToArray($crawler->run()); + + $body = $results[0]->get('body'); + + expect($body)->toContain('
yes
'); +}); + +test('BrowserAction::evaluateAndWaitForReload() works as expected', function () { + $crawler = new HeadlessBrowserCrawler(); + + $crawler + ->input('http://localhost:8000/browser-actions/evaluate-and-wait-for-reload') + ->addStep( + Http::get() + ->postBrowserNavigateHook( + BrowserAction::evaluateAndWaitForReload( + 'window.location.href = \'http://localhost:8000/browser-actions/' . + 'evaluate-and-wait-for-reload-reloaded\'', + ), + ) + ->keep('body'), + ); + + $results = helper_generatorToArray($crawler->run()); + + $body = $results[0]->get('body'); + + expect($body)->toContain('
yay
'); +}); + +test('BrowserAction::wait() works as expected', function () { + $crawler = new HeadlessBrowserCrawler(); + + $crawler + ->input('http://localhost:8000/browser-actions/wait') + ->addStep( + Http::get() + ->postBrowserNavigateHook(BrowserAction::wait(0.3)) + ->keep('body'), + ); + + $results = helper_generatorToArray($crawler->run()); + + $body = $results[0]->get('body'); + + expect($body)->toContain('
hooray
'); +}); diff --git a/tests/_Integration/Server.php b/tests/_Integration/Server.php index 9105e443..494080de 100644 --- a/tests/_Integration/Server.php +++ b/tests/_Integration/Server.php @@ -57,6 +57,32 @@ function getParamAfter(string $route, string $after): string return include(__DIR__ . '/_Server/SetCookieJs.php'); } +if ($route === '/set-delayed-js-cookie') { + return include(__DIR__ . '/_Server/SetDelayedCookieJs.php'); +} + +if (str_starts_with($route, '/browser-actions')) { + if ($route === '/browser-actions') { + return include(__DIR__ . '/_Server/BrowserActions/Main.php'); + } + + if (str_starts_with($route, '/browser-actions/click-and-wait-for-reload')) { + return include(__DIR__ . '/_Server/BrowserActions/ClickAndWaitForReload.php'); + } + + if ($route === '/browser-actions/evaluate-and-wait-for-reload') { + return include(__DIR__ . '/_Server/BrowserActions/EvaluateAndWaitForReload.php'); + } + + if ($route === '/browser-actions/evaluate-and-wait-for-reload-reloaded') { + return include(__DIR__ . '/_Server/BrowserActions/EvaluateAndWaitForReloadReloaded.php'); + } + + if ($route === '/browser-actions/wait') { + return include(__DIR__ . '/_Server/BrowserActions/Wait.php'); + } +} + if ($route === '/print-cookie') { return include(__DIR__ . '/_Server/PrintCookie.php'); } diff --git a/tests/_Integration/_Server/BrowserActions/ClickAndWaitForReload.php b/tests/_Integration/_Server/BrowserActions/ClickAndWaitForReload.php new file mode 100644 index 00000000..5070f9b0 --- /dev/null +++ b/tests/_Integration/_Server/BrowserActions/ClickAndWaitForReload.php @@ -0,0 +1,16 @@ + + + + + Hello World + + +
+ Click here + + +
yes
+ +
+ + diff --git a/tests/_Integration/_Server/BrowserActions/EvaluateAndWaitForReload.php b/tests/_Integration/_Server/BrowserActions/EvaluateAndWaitForReload.php new file mode 100644 index 00000000..c29b008f --- /dev/null +++ b/tests/_Integration/_Server/BrowserActions/EvaluateAndWaitForReload.php @@ -0,0 +1,5 @@ + + +Hello World + + diff --git a/tests/_Integration/_Server/BrowserActions/EvaluateAndWaitForReloadReloaded.php b/tests/_Integration/_Server/BrowserActions/EvaluateAndWaitForReloadReloaded.php new file mode 100644 index 00000000..30b9ff22 --- /dev/null +++ b/tests/_Integration/_Server/BrowserActions/EvaluateAndWaitForReloadReloaded.php @@ -0,0 +1,7 @@ + + +Hello World + +
yay
+ + diff --git a/tests/_Integration/_Server/BrowserActions/Main.php b/tests/_Integration/_Server/BrowserActions/Main.php new file mode 100644 index 00000000..bc1e70a4 --- /dev/null +++ b/tests/_Integration/_Server/BrowserActions/Main.php @@ -0,0 +1,23 @@ + + + + + Hello World + + +
+
+ +
+
Click me
+ +
+ + +
+ + diff --git a/tests/_Integration/_Server/BrowserActions/Wait.php b/tests/_Integration/_Server/BrowserActions/Wait.php new file mode 100644 index 00000000..16ac9cf7 --- /dev/null +++ b/tests/_Integration/_Server/BrowserActions/Wait.php @@ -0,0 +1,18 @@ + + + + + Hello World + + +
+
+ + +
+ + diff --git a/tests/_Integration/_Server/SetCookieJs.php b/tests/_Integration/_Server/SetCookieJs.php index 650a9586..8b5d6110 100644 --- a/tests/_Integration/_Server/SetCookieJs.php +++ b/tests/_Integration/_Server/SetCookieJs.php @@ -1,3 +1,4 @@ + yo diff --git a/tests/_Integration/_Server/SetDelayedCookieJs.php b/tests/_Integration/_Server/SetDelayedCookieJs.php new file mode 100644 index 00000000..8a33973b --- /dev/null +++ b/tests/_Integration/_Server/SetDelayedCookieJs.php @@ -0,0 +1,10 @@ + + + +Hey + +
+ +
+ +