From 9766d7febb5e25d8e1bce622da22ce23e6fd06df Mon Sep 17 00:00:00 2001 From: Daimona Eaytoy Date: Thu, 15 Jul 2021 14:47:59 +0200 Subject: [PATCH] Improve cache key generation based on image URLs The protocol is ininfluent; for the size, we can tolerate small differences, and this way we can prefetch the next image for on-wiki requests. Bug: T286356 --- src/Controller/OcrController.php | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/Controller/OcrController.php b/src/Controller/OcrController.php index 33e7703..7d4f546 100644 --- a/src/Controller/OcrController.php +++ b/src/Controller/OcrController.php @@ -225,7 +225,7 @@ private function getText(): string $cacheKey = md5(implode( '|', [ - $this->imageUrl, + self::transformImageURLForCacheKey($this->imageUrl), static::$params['engine'], implode('|', static::$params['langs']), static::$params['psm'], @@ -237,4 +237,21 @@ private function getText(): string return $this->engine->getText($this->imageUrl, static::$params['langs']); }); } + + /** + * Make an image URL suitable to be used as a cache key (e.g. strip protocol) + * @param string $url + * @return string + */ + private static function transformImageURLForCacheKey(string $url): string + { + return preg_replace_callback( + '/(page\d+-)(\d+)px/', + static function (array $matches) { + // Tolerate ±50px, see T286356. + return $matches[1].( round($matches[2] / 100) * 100 ).'px'; + }, + preg_replace('/^https?:/i', '', $url) + ); + } }