From dd653eee1e248e8b3354a0ad13d801ac9f5d86be Mon Sep 17 00:00:00 2001 From: otsch Date: Tue, 19 Dec 2023 13:16:18 +0100 Subject: [PATCH] Fix paginating with multiple initial inputs Reset paginator state after finishing paginating for one base input, to enable paginating multiple listings of the same structure. Also fix a warning when running tests, because of a final new line in a demo cache file and add memory limit option when running phpstan. --- .editorconfig | 3 +++ CHANGELOG.md | 4 ++++ composer.json | 2 +- src/Steps/Loading/Http/AbstractPaginator.php | 19 ++++++++++++++++ src/Steps/Loading/Http/Paginate.php | 4 ++++ tests/Cache/_cachefilecontent | 2 +- tests/_Integration/Http/PaginationTest.php | 12 ++++++++++ .../Http/QueryParamPaginationTest.php | 22 +++++++++++++++++++ .../_Integration/_Server/PaginatedListing.php | 9 ++++++-- 9 files changed, 73 insertions(+), 4 deletions(-) diff --git a/.editorconfig b/.editorconfig index 75349f8..01a1248 100644 --- a/.editorconfig +++ b/.editorconfig @@ -15,3 +15,6 @@ trim_trailing_whitespace = false [*.yml] indent_size = 2 + +[_cachefilecontent] +insert_final_newline = false diff --git a/CHANGELOG.md b/CHANGELOG.md index f22c826..7f672a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.3.4] - 2023-12-19 +### Fixed +* Reset paginator state after finishing paginating for one base input, to enable paginating multiple listings of the same structure. + ## [1.3.3] - 2023-12-01 ### Fixed * Add forgotten getter method to get the DOM query that is attached to an `InvalidDomQueryException` instance. diff --git a/composer.json b/composer.json index 25a07b0..85bc90e 100644 --- a/composer.json +++ b/composer.json @@ -69,7 +69,7 @@ "scripts": { "test": "pest --exclude-group integration", "test-integration": "pest --group integration", - "stan": "phpstan analyse", + "stan": "@php -d memory_limit=4G vendor/bin/phpstan analyse", "cs": "php-cs-fixer fix -v --dry-run", "cs-fix": "php-cs-fixer fix -v", "add-git-hooks": "@php bin/add-git-hooks" diff --git a/src/Steps/Loading/Http/AbstractPaginator.php b/src/Steps/Loading/Http/AbstractPaginator.php index f5e6cd1..58e8c1e 100644 --- a/src/Steps/Loading/Http/AbstractPaginator.php +++ b/src/Steps/Loading/Http/AbstractPaginator.php @@ -44,6 +44,25 @@ public function hasFinished(): bool return $this->hasFinished || $this->maxPagesReached(); } + /** + * When a paginate step is called with multiple inputs, like: + * + * ['https://www.example.com/listing1', 'https://www.example.com/listing2', ...] + * + * it always has to start paginating again for each listing base URL. + * Therefore, we reset the state after finishing paginating one base input. + * Except for $this->found, because if it would be the case that the exact same pages are + * discovered whilst paginating, we don't want to load the exact same pages again and again. + */ + public function resetFinished(): void + { + $this->hasFinished = false; + + $this->loadedCount = 0; + + $this->latestRequest = null; + } + public function stopWhen(Closure|StopRule $callback): self { $this->stopRules[] = $callback; diff --git a/src/Steps/Loading/Http/Paginate.php b/src/Steps/Loading/Http/Paginate.php index b3a9295..970ca53 100644 --- a/src/Steps/Loading/Http/Paginate.php +++ b/src/Steps/Loading/Http/Paginate.php @@ -70,6 +70,10 @@ protected function invoke(mixed $input): Generator if ($this->logger) { $this->paginator->logWhenFinished($this->logger); + + if (method_exists($this->paginator, 'resetFinished')) { + $this->paginator->resetFinished(); + } } } diff --git a/tests/Cache/_cachefilecontent b/tests/Cache/_cachefilecontent index 63f5180..46ffe9e 100644 --- a/tests/Cache/_cachefilecontent +++ b/tests/Cache/_cachefilecontent @@ -217,4 +217,4 @@ $query = Query::fromString('foo=1&bar=2&baz=3&quz=4') -";} +";} \ No newline at end of file diff --git a/tests/_Integration/Http/PaginationTest.php b/tests/_Integration/Http/PaginationTest.php index a3349fd..1e9ff25 100644 --- a/tests/_Integration/Http/PaginationTest.php +++ b/tests/_Integration/Http/PaginationTest.php @@ -51,3 +51,15 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger expect($this->getActualOutputForAssertion())->toContain('Max pages limit reached'); }); + +it('resets the finished paginating state after each processed (/paginated) input', function () { + $crawler = new PaginationCrawler(); + + $crawler + ->inputs(['http://localhost:8000/paginated-listing', 'http://localhost:8000/paginated-listing?foo=bar']) + ->addStep(Http::get()->paginate('#pagination', 2)->outputKey('response')); + + $results = helper_generatorToArray($crawler->run()); + + expect($results)->toHaveCount(4); +}); diff --git a/tests/_Integration/Http/QueryParamPaginationTest.php b/tests/_Integration/Http/QueryParamPaginationTest.php index cfcc146..f9c1e9f 100644 --- a/tests/_Integration/Http/QueryParamPaginationTest.php +++ b/tests/_Integration/Http/QueryParamPaginationTest.php @@ -89,3 +89,25 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger expect($results)->toHaveCount(2); }); + +it('resets the finished paginating state after each processed (/paginated) input', function () { + $crawler = new QueryParamPaginationCrawler(); + + $crawler + ->inputs([ + 'http://localhost:8000/query-param-pagination?page=1', + 'http://localhost:8000/query-param-pagination?page=1&foo=bar', + ]) + ->addStep( + Http::get() + ->paginate( + QueryParamsPaginator::paramsInUrl(2) + ->increase('page') + ->stopWhen(PaginatorStopRules::isEmptyInJson('data.items')) + )->addToResult(['body']) + ); + + $results = helper_generatorToArray($crawler->run()); + + expect($results)->toHaveCount(4); +}); diff --git a/tests/_Integration/_Server/PaginatedListing.php b/tests/_Integration/_Server/PaginatedListing.php index 22263c0..e060fc8 100644 --- a/tests/_Integration/_Server/PaginatedListing.php +++ b/tests/_Integration/_Server/PaginatedListing.php @@ -7,8 +7,13 @@
@@ -27,11 +32,11 @@