From 294cf6c624a9848c2144648d0436a478c46799cf Mon Sep 17 00:00:00 2001 From: Her Email Date: Mon, 4 Dec 2023 09:11:32 -0500 Subject: [PATCH] limit frequent fetch --- catalog/api.py | 5 +++-- catalog/search/models.py | 18 +++++++++++++----- catalog/search/views.py | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/catalog/api.py b/catalog/api.py index e28db8cb..43f5f9f6 100644 --- a/catalog/api.py +++ b/catalog/api.py @@ -67,8 +67,9 @@ def fetch_item(request, url: str): Convert a URL from a supported site (e.g. https://m.imdb.com/title/tt2852400/) to an item. If the item is not available in the catalog, HTTP 202 will be returned. - Wait 10 seconds or longer, call with same input again, it may return the actual fetched item. + Wait 15 seconds or longer, call with same input again, it may return the actual fetched item. Some site may take ~90 seconds to fetch. + If not getting the item after 120 seconds, please stop and consider the URL is not available. """ site = SiteManager.get_site_by_url(url) if not site: @@ -76,7 +77,7 @@ def fetch_item(request, url: str): item = site.get_item() if item: return 200, item - if get_fetch_lock(): + if get_fetch_lock(request.user, url): enqueue_fetch(url, False) return 202, {"message": "Fetch in progress"} diff --git a/catalog/search/models.py b/catalog/search/models.py index ab468620..a7653101 100644 --- a/catalog/search/models.py +++ b/catalog/search/models.py @@ -109,11 +109,19 @@ def query_index(keywords, categories=None, tag=None, page=1, prepare_external=Tr return items, result.num_pages, result.count, duplicated_items -_fetch_lock_key = "_fetch_lock" -_fetch_lock_ttl = 2 - - -def get_fetch_lock(): +def get_fetch_lock(user, url): + if user and user.is_authenticated: + _fetch_lock_key = f"_fetch_lock:{user.id}" + _fetch_lock_ttl = 1 if settings.DEBUG else 3 + else: + _fetch_lock_key = "_fetch_lock" + _fetch_lock_ttl = 1 if settings.DEBUG else 15 + if cache.get(_fetch_lock_key): + return False + cache.set(_fetch_lock_key, 1, timeout=_fetch_lock_ttl) + # do not fetch the same url twice in 2 hours + _fetch_lock_key = f"_fetch_lock:{url}" + _fetch_lock_ttl = 1 if settings.DEBUG else 7200 if cache.get(_fetch_lock_key): return False cache.set(_fetch_lock_key, 1, timeout=_fetch_lock_ttl) diff --git a/catalog/search/views.py b/catalog/search/views.py index 605fc19c..501273b7 100644 --- a/catalog/search/views.py +++ b/catalog/search/views.py @@ -61,7 +61,7 @@ def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = No } ) job_id = None - if is_refetch or get_fetch_lock(): + if is_refetch or get_fetch_lock(request.user, url): job_id = enqueue_fetch(url, is_refetch, request.user) return render( request,