From 3c5b5447aba4c1ea5a51f4480ec570e90cb9d656 Mon Sep 17 00:00:00 2001 From: otsch Date: Mon, 26 Aug 2024 12:24:45 +0200 Subject: [PATCH] Allow prolonging ttl for cached responses Add new methods `FileCache::prolong()` and `FileCache::prolongAll()` to allow prolonging the time to live for cached responses. --- CHANGELOG.md | 3 + src/Cache/CacheItem.php | 8 ++ src/Cache/FileCache.php | 48 ++++++-- src/Logger/CliLogger.php | 4 + tests/Cache/FileCacheTest.php | 226 ++++++++++++++++++++++++---------- tests/Pest.php | 10 +- tests/_Stubs/DummyLogger.php | 4 + 7 files changed, 229 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f63d0c3b..9e0a37a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * __BREAKING__: The `HttpLoader::retryCachedErrorResponses()` method now returns an instance of the new `Crwlr\Crawler\Loader\Http\Cache\RetryManager` class. This class provides the methods `only()` and `except()` to restrict retries to specific HTTP response status codes. Previously, this method returned the `HttpLoader` itself (`$this`), so if you're using it in a chain and calling other loader methods after it, you will need to refactor your code. * __BREAKING__: Removed the `Microseconds` class from this package. It has been moved to the `crwlr/utils` package, which you can use instead. +### Added +* New methods `FileCache::prolong()` and `FileCache::prolongAll()` to allow prolonging the time to live for cached responses. + ## [1.10.0] - 2024-08-05 ### Added * URL refiners: `UrlRefiner::withScheme()`, `UrlRefiner::withHost()`, `UrlRefiner::withPort()`, `UrlRefiner::withoutPort()`, `UrlRefiner::withPath()`, `UrlRefiner::withQuery()`, `UrlRefiner::withoutQuery()`, `UrlRefiner::withFragment()` and `UrlRefiner::withoutFragment()`. diff --git a/src/Cache/CacheItem.php b/src/Cache/CacheItem.php index 1d814901..b69c1ddf 100644 --- a/src/Cache/CacheItem.php +++ b/src/Cache/CacheItem.php @@ -47,6 +47,14 @@ public function isExpired(): bool return time() > $this->createdAt->add($ttl)->getTimestamp(); } + /** + * Get a new instance with same data but a different time to live. + */ + public function withTtl(DateInterval|int $ttl): CacheItem + { + return new CacheItem($this->value, $this->key, $ttl, $this->createdAt); + } + /** * @return mixed[] */ diff --git a/src/Cache/FileCache.php b/src/Cache/FileCache.php index dc7e8b9b..527df2cd 100644 --- a/src/Cache/FileCache.php +++ b/src/Cache/FileCache.php @@ -82,13 +82,7 @@ public function set(string $key, mixed $value, DateInterval|int|null $ttl = null $value = new CacheItem($value->value(), $key, $ttl ?? $value->ttl); } - $content = serialize($value); - - if ($this->useCompression) { - $content = $this->encode($content); - } - - return file_put_contents($this->basePath . '/' . $key, $content) !== false; + return $this->saveCacheItem($value); } public function delete(string $key): bool @@ -96,6 +90,17 @@ public function delete(string $key): bool return unlink($this->basePath . '/' . $key); } + public function prolong(string $key, DateInterval|int $ttl): bool + { + try { + $item = $this->getCacheItem($key); + + return $this->saveCacheItem($item->withTtl($ttl)); + } catch (Throwable) { + return false; + } + } + /** * @throws InvalidArgumentException */ @@ -114,6 +119,21 @@ public function clear(): bool return true; } + public function prolongAll(DateInterval|int $ttl): bool + { + $allFiles = scandir($this->basePath); + + if (is_array($allFiles)) { + foreach ($allFiles as $file) { + if ($file !== '.' && $file !== '..' && $file !== '.gitkeep' && !$this->prolong($file, $ttl)) { + return false; + } + } + } + + return true; + } + /** * @return iterable * @throws MissingZlibExtensionException|ReadingCacheFailedException|InvalidArgumentException @@ -176,6 +196,20 @@ protected function getCacheItem(string $key): CacheItem return $unserialized; } + /** + * @throws MissingZlibExtensionException + */ + protected function saveCacheItem(CacheItem $item): bool + { + $content = serialize($item); + + if ($this->useCompression) { + $content = $this->encode($content); + } + + return file_put_contents($this->basePath . '/' . $item->key(), $content) !== false; + } + protected function unserialize(string $content): mixed { // Temporarily set a new error handler, so unserializing a compressed string does not result in a PHP warning. diff --git a/src/Logger/CliLogger.php b/src/Logger/CliLogger.php index b6cb8bdf..3784de22 100644 --- a/src/Logger/CliLogger.php +++ b/src/Logger/CliLogger.php @@ -50,6 +50,10 @@ public function debug(string|Stringable $message, array $context = []): void $this->log('debug', $message, $context); } + /** + * @param string $level + * @param mixed[] $context + */ public function log($level, string|Stringable $message, array $context = []): void { if (!is_string($level)) { diff --git a/tests/Cache/FileCacheTest.php b/tests/Cache/FileCacheTest.php index 6f45cb3f..995ebdd7 100644 --- a/tests/Cache/FileCacheTest.php +++ b/tests/Cache/FileCacheTest.php @@ -33,6 +33,19 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq return new RespondedRequest(new Request('GET', $requestUrl), new Response()); } +/** + * Helper function to get the CacheItem instance, because FileCache::get() returns only + * the value wrapped in the CacheItem object. + */ +function helper_getCacheItemByKey(string $key): ?CacheItem +{ + $cacheFileContent = file_get_contents(helper_cachedir() . '/' . $key); + + $cacheItem = unserialize($cacheFileContent !== false ? $cacheFileContent : 'a:0:{}'); + + return $cacheItem instanceof CacheItem ? $cacheItem : null; +} + afterEach(function () { helper_resetCacheDir(); }); @@ -52,11 +65,9 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache = new FileCache(helper_cachedir()); - expect($cache->set($respondedRequest->cacheKey(), $respondedRequest))->toBeTrue(); - - expect(file_exists(helper_cachedir() . '/' . $respondedRequest->cacheKey()))->toBeTrue(); - - expect($cache->get($respondedRequest->cacheKey()))->toBeInstanceOf(RespondedRequest::class); + expect($cache->set($respondedRequest->cacheKey(), $respondedRequest))->toBeTrue() + ->and(file_exists(helper_cachedir() . '/' . $respondedRequest->cacheKey()))->toBeTrue() + ->and($cache->get($respondedRequest->cacheKey()))->toBeInstanceOf(RespondedRequest::class); }); it('checks if it has an item for a certain key', function () { @@ -66,9 +77,8 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache->set($respondedRequest->cacheKey(), $respondedRequest); - expect($cache->has($respondedRequest->cacheKey()))->toBeTrue(); - - expect($cache->has('otherKey'))->toBeFalse(); + expect($cache->has($respondedRequest->cacheKey()))->toBeTrue() + ->and($cache->has('otherKey'))->toBeFalse(); }); it('does not return expired items', function () { @@ -85,9 +95,8 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache->set($cacheItem->key(), $cacheItem); - expect($cache->has($cacheItem->key()))->toBeFalse(); - - expect($cache->get($cacheItem->key()))->toBeNull(); + expect($cache->has($cacheItem->key()))->toBeFalse() + ->and($cache->get($cacheItem->key()))->toBeNull(); }); it('deletes a cache item', function () { @@ -111,11 +120,9 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache->set('foo', $cacheItem); - expect(file_exists(helper_cachedir() . '/foo'))->toBeTrue(); - - expect($cache->has('foo'))->toBeFalse(); - - expect(file_exists(helper_cachedir() . '/foo'))->toBeFalse(); + expect(file_exists(helper_cachedir() . '/foo'))->toBeTrue() + ->and($cache->has('foo'))->toBeFalse() + ->and(file_exists(helper_cachedir() . '/foo'))->toBeFalse(); }); it('deletes an expired cache item when get() is called with its key', function () { @@ -125,11 +132,9 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache->set('foo', $cacheItem); - expect(file_exists(helper_cachedir() . '/foo'))->toBeTrue(); - - expect($cache->get('foo', 'defaultValue'))->toBe('defaultValue'); - - expect(file_exists(helper_cachedir() . '/foo'))->toBeFalse(); + expect(file_exists(helper_cachedir() . '/foo'))->toBeTrue() + ->and($cache->get('foo', 'defaultValue'))->toBe('defaultValue') + ->and(file_exists(helper_cachedir() . '/foo'))->toBeFalse(); }); it('clears the whole cache', function () { @@ -143,19 +148,15 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq helper_addMultipleItemsToCache([$cacheItem1, $cacheItem2, $cacheItem3], $cache); - expect($cache->has($cacheItem1->cacheKey()))->toBeTrue(); - - expect($cache->has($cacheItem2->cacheKey()))->toBeTrue(); - - expect($cache->has($cacheItem3->cacheKey()))->toBeTrue(); + expect($cache->has($cacheItem1->cacheKey()))->toBeTrue() + ->and($cache->has($cacheItem2->cacheKey()))->toBeTrue() + ->and($cache->has($cacheItem3->cacheKey()))->toBeTrue(); $cache->clear(); - expect($cache->has($cacheItem1->cacheKey()))->toBeFalse(); - - expect($cache->has($cacheItem2->cacheKey()))->toBeFalse(); - - expect($cache->has($cacheItem3->cacheKey()))->toBeFalse(); + expect($cache->has($cacheItem1->cacheKey()))->toBeFalse() + ->and($cache->has($cacheItem2->cacheKey()))->toBeFalse() + ->and($cache->has($cacheItem3->cacheKey()))->toBeFalse(); }); it('gets multiple items', function () { @@ -171,11 +172,9 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $items = $cache->getMultiple([$cacheItem1->cacheKey(), $cacheItem2->cacheKey(), $cacheItem3->cacheKey()]); - expect(reset($items)->request->getUri()->__toString())->toBe('/foo'); - - expect(next($items)->request->getUri()->__toString())->toBe('/bar'); - - expect(next($items)->request->getUri()->__toString())->toBe('/baz'); + expect(reset($items)->request->getUri()->__toString())->toBe('/foo') + ->and(next($items)->request->getUri()->__toString())->toBe('/bar') + ->and(next($items)->request->getUri()->__toString())->toBe('/baz'); }); it('sets multiple items', function () { @@ -193,11 +192,9 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cacheItem3->cacheKey() => $cacheItem3, ]); - expect($cache->has($cacheItem1->cacheKey()))->toBeTrue(); - - expect($cache->has($cacheItem2->cacheKey()))->toBeTrue(); - - expect($cache->has($cacheItem3->cacheKey()))->toBeTrue(); + expect($cache->has($cacheItem1->cacheKey()))->toBeTrue() + ->and($cache->has($cacheItem2->cacheKey()))->toBeTrue() + ->and($cache->has($cacheItem3->cacheKey()))->toBeTrue(); }); it('deletes multiple items', function () { @@ -213,11 +210,9 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache->deleteMultiple([$cacheItem1->cacheKey(), $cacheItem2->cacheKey(), $cacheItem3->cacheKey()]); - expect($cache->has($cacheItem1->cacheKey()))->toBeFalse(); - - expect($cache->has($cacheItem2->cacheKey()))->toBeFalse(); - - expect($cache->has($cacheItem3->cacheKey()))->toBeFalse(); + expect($cache->has($cacheItem1->cacheKey()))->toBeFalse() + ->and($cache->has($cacheItem2->cacheKey()))->toBeFalse() + ->and($cache->has($cacheItem3->cacheKey()))->toBeFalse(); }); it('can still use legacy (pre CacheItem object) cache files', function () { @@ -235,11 +230,10 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $respondedRequest = RespondedRequest::fromArray($cacheItem); - expect($respondedRequest)->toBeInstanceOf(RespondedRequest::class); - - expect($respondedRequest->requestedUri())->toBe( - 'https://www.crwlr.software/blog/dealing-with-http-url-query-strings-in-php', - ); + expect($respondedRequest)->toBeInstanceOf(RespondedRequest::class) + ->and($respondedRequest->requestedUri())->toBe( + 'https://www.crwlr.software/blog/dealing-with-http-url-query-strings-in-php', + ); }); it('compresses cache data when useCompression() is used', function () { @@ -270,15 +264,13 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $compressedFileSize = filesize(helper_cachedir() . '/' . $respondedRequest->cacheKey()); - expect($compressedFileSize)->not()->toBeFalse(); - /** @var int $uncompressedFileSize */ - expect($compressedFileSize)->toBeLessThan($uncompressedFileSize); - - // Didn't want to check for exact numbers, because I guess they could be a bit different on different systems. - // But thought the diff should at least be more than 30% for the test to succeed. - expect($uncompressedFileSize - $compressedFileSize)->toBeGreaterThan($uncompressedFileSize * 0.3); + expect($compressedFileSize)->not()->toBeFalse() + ->and($compressedFileSize)->toBeLessThan($uncompressedFileSize) + // Didn't want to check for exact numbers, because I guess they could be a bit different on different systems. + // But thought the diff should at least be more than 30% for the test to succeed. + ->and($uncompressedFileSize - $compressedFileSize)->toBeGreaterThan($uncompressedFileSize * 0.3); }); it('gets compressed cache items', function () { @@ -295,9 +287,8 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $retrievedCacheItem = $cache->get($respondedRequest->cacheKey()); - expect($retrievedCacheItem)->toBeInstanceOf(RespondedRequest::class); - - expect(Http::getBodyString($retrievedCacheItem))->toBe('Hello World'); + expect($retrievedCacheItem)->toBeInstanceOf(RespondedRequest::class) + ->and(Http::getBodyString($retrievedCacheItem))->toBe('Hello World'); }); it('is also able to decode uncompressed cache files when useCompression() is used', function () { @@ -355,11 +346,116 @@ function helper_respondedRequestWithRequestUrl(string $requestUrl): RespondedReq $cache->set($respondedRequest->cacheKey(), $respondedRequest); - $cacheFileContent = file_get_contents(helper_cachedir() . '/' . $respondedRequest->cacheKey()); + $cacheItem = helper_getCacheItemByKey($respondedRequest->cacheKey()); - $cacheItem = unserialize($cacheFileContent !== false ? $cacheFileContent : 'a:0:{}'); + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(900); +}); + +it('prolongs the time to live for a single item', function () { + $cache = new FileCache(helper_cachedir()); + + $cache->ttl(100); + + $respondedRequest = new RespondedRequest(new Request('GET', '/a'), new Response(body: Utils::streamFor('b'))); + + $cache->set($respondedRequest->cacheKey(), $respondedRequest); + + $cacheItem = helper_getCacheItemByKey($respondedRequest->cacheKey()); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(100); + + /** @var CacheItem $cacheItem */ + + $cache->prolong($cacheItem->key(), 200); + + $cacheItem = helper_getCacheItemByKey($cacheItem->key()); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(200); +}); + +it('prolongs the time to live for all items in the cache directory', function () { + $cache = new FileCache(helper_cachedir()); + + $respondedRequest = new RespondedRequest(new Request('GET', '/a'), new Response(body: Utils::streamFor('b'))); + + $cache->set($key1 = $respondedRequest->cacheKey(), $respondedRequest, 100); + + $respondedRequest = new RespondedRequest(new Request('GET', '/c'), new Response(body: Utils::streamFor('d'))); + + $cache->set($key2 = $respondedRequest->cacheKey(), $respondedRequest, 200); + + $respondedRequest = new RespondedRequest(new Request('GET', '/e'), new Response(body: Utils::streamFor('f'))); + + $cache->set($key3 = $respondedRequest->cacheKey(), $respondedRequest, 300); + + $cacheItem = helper_getCacheItemByKey($key1); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(100); + + $cacheItem = helper_getCacheItemByKey($key2); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(200); + + $cacheItem = helper_getCacheItemByKey($key3); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(300); + + $cache->prolongAll(250); + + $cacheItem = helper_getCacheItemByKey($key1); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(250); + + $cacheItem = helper_getCacheItemByKey($key2); + + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(250); + + $cacheItem = helper_getCacheItemByKey($key3); + + // Prolonging sets the provided value, no matter if an item's previous ttl value was + // higher than the new one. + expect($cacheItem)->toBeInstanceOf(CacheItem::class) + ->and($cacheItem?->ttl)->toBe(250); +}); + +test('the get() and has() methods delete an expired item, but prolong does not', function () { + $cache = new FileCache(helper_cachedir()); + + $resp = new RespondedRequest(new Request('GET', '/'), new Response()); + + // with get() + $cacheItem = new CacheItem($resp, $resp->cacheKey(), 10, (new DateTimeImmutable())->sub(new DateInterval('PT11S'))); + + $cache->set($cacheItem->key(), $cacheItem); + + $cacheItem = $cache->get($cacheItem->key()); + + expect($cacheItem)->toBeNull() + ->and(file_exists(helper_cachedir($resp->cacheKey())))->toBeFalse(); + + // with has() + $cacheItem = new CacheItem($resp, $resp->cacheKey(), 10, (new DateTimeImmutable())->sub(new DateInterval('PT11S'))); + + $cache->set($cacheItem->key(), $cacheItem); + + $cache->has($cacheItem->key()); + + expect($cache->has($cacheItem->key()))->toBeFalse() + ->and(file_exists(helper_cachedir($cacheItem->key())))->toBeFalse(); + + // with prolong() + $cache->set($cacheItem->key(), $cacheItem); - expect($cacheItem)->toBeInstanceOf(CacheItem::class); + $cache->prolong($cacheItem->key(), 20); - expect($cacheItem?->ttl)->toBe(900); + expect($cache->has($cacheItem->key()))->toBeTrue() + ->and(file_exists(helper_cachedir($cacheItem->key())))->toBeTrue(); }); diff --git a/tests/Pest.php b/tests/Pest.php index fd959567..43407750 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -321,9 +321,15 @@ function helper_getRespondedRequest( return new RespondedRequest($request, $response); } -function helper_cachedir(): string +function helper_cachedir(?string $inDir = null): string { - return __DIR__ . '/_Temp/_cachedir'; + $path = __DIR__ . '/_Temp/_cachedir'; + + if ($inDir !== null) { + return $path . (str_starts_with($inDir, '/') ? $inDir : '/' . $inDir); + } + + return $path; } function helper_resetCacheDir(): void diff --git a/tests/_Stubs/DummyLogger.php b/tests/_Stubs/DummyLogger.php index 7a7b723a..ce807875 100644 --- a/tests/_Stubs/DummyLogger.php +++ b/tests/_Stubs/DummyLogger.php @@ -54,6 +54,10 @@ public function debug(string|Stringable $message, array $context = []): void $this->log('debug', $message, $context); } + /** + * @param string $level + * @param mixed[] $context + */ public function log($level, string|Stringable $message, array $context = []): void { if (!is_string($level)) {