Skip to content

Commit

Permalink
limit frequent fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
Her Email authored and Their Name committed Dec 4, 2023
1 parent def23f8 commit b9340a5
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 8 deletions.
5 changes: 3 additions & 2 deletions catalog/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,17 @@ def fetch_item(request, url: str):
Convert a URL from a supported site (e.g. https://m.imdb.com/title/tt2852400/) to an item.
If the item is not available in the catalog, HTTP 202 will be returned.
Wait 10 seconds or longer, call with same input again, it may return the actual fetched item.
Wait 15 seconds or longer, call with same input again, it may return the actual fetched item.
Some site may take ~90 seconds to fetch.
If not getting the item after 120 seconds, please stop and consider the URL is not available.
"""
site = SiteManager.get_site_by_url(url)
if not site:
return 404, {"message": "URL not supported"}
item = site.get_item()
if item:
return 200, item
if get_fetch_lock():
if get_fetch_lock(request.user, url):
enqueue_fetch(url, False)
return 202, {"message": "Fetch in progress"}

Expand Down
18 changes: 13 additions & 5 deletions catalog/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,19 @@ def query_index(keywords, categories=None, tag=None, page=1, prepare_external=Tr
return items, result.num_pages, result.count, duplicated_items


_fetch_lock_key = "_fetch_lock"
_fetch_lock_ttl = 2


def get_fetch_lock():
def get_fetch_lock(user, url):
if user and user.is_authenticated:
_fetch_lock_key = f"_fetch_lock:{user.id}"
_fetch_lock_ttl = 1 if settings.DEBUG else 3
else:
_fetch_lock_key = "_fetch_lock"
_fetch_lock_ttl = 1 if settings.DEBUG else 15
if cache.get(_fetch_lock_key):
return False
cache.set(_fetch_lock_key, 1, timeout=_fetch_lock_ttl)
# do not fetch the same url twice in 2 hours
_fetch_lock_key = f"_fetch_lock:{url}"
_fetch_lock_ttl = 1 if settings.DEBUG else 7200
if cache.get(_fetch_lock_key):
return False
cache.set(_fetch_lock_key, 1, timeout=_fetch_lock_ttl)
Expand Down
2 changes: 1 addition & 1 deletion catalog/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = No
}
)
job_id = None
if is_refetch or get_fetch_lock():
if is_refetch or get_fetch_lock(request.user, url):
job_id = enqueue_fetch(url, is_refetch, request.user)
return render(
request,
Expand Down

0 comments on commit b9340a5

Please sign in to comment.