Skip to content

Commit

Permalink
add backoff function
Browse files Browse the repository at this point in the history
  • Loading branch information
braykuka committed Nov 20, 2024
1 parent 71e923e commit 85965ec
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 11 deletions.
17 changes: 8 additions & 9 deletions scrapers/in/apiclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,14 @@ def get_session_no(self, session):
headers["User-Agent"] = self.user_agent
url = urljoin(self.root, f"/{session}")

while not session_no:
resp = requests.get(url, headers=headers)
resp = resp.json()
if "message" in resp:
self.scraper.warning(resp["message"])
continue
session_no_regex = re.search(r"Session\s+(\d+).+", resp["name"])
if session_no_regex:
session_no = session_no_regex.group(1)
resp = requests.get(url, headers=headers).json()
if "message" in resp:
raise Exception(resp["message"])
session_no_regex = re.search(r"Session\s+(\d+).+", resp["name"])
if session_no_regex:
session_no = session_no_regex.group(1)
else:
raise Exception("Invalid session")

return session_no

Expand Down
4 changes: 2 additions & 2 deletions scrapers/in/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytz
from openstates.scrape import Scraper, Event
from .apiclient import ApiClient
from .utils import add_space
from .utils import add_space, backoff
from openstates.exceptions import EmptyScrape


Expand All @@ -21,7 +21,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def scrape(self):
session_no = self.apiclient.get_session_no(self.session)
session_no = backoff(self.apiclient.get_session_no, self.session)
response = self.apiclient.get("meetings", session=self.session)

meetings = response["meetings"]
Expand Down
26 changes: 26 additions & 0 deletions scrapers/in/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import requests
import logging
import time

logging.getLogger("IN").setLevel(logging.WARNING)
log = logging.getLogger("openstates")


def get_with_increasing_timeout(scraper, link, fail=False, kwargs={}):
Expand Down Expand Up @@ -57,3 +62,24 @@ def add_space(text):
new_string = f"{alpha} {number}"

return new_string


def backoff(function, *args, **kwargs):
retries = 5

def _():
time.sleep(1)
return function(*args, **kwargs)

for attempt in range(retries):
try:
return _()
except Exception as e:
backoff = (attempt + 1) * 15
log.warning(
"[attempt %s]: %s. Backing off for %s seconds."
% (attempt, str(e), backoff)
)
time.sleep(backoff)

raise Exception("INDIANA API returns still None. Please confirm API status.")

0 comments on commit 85965ec

Please sign in to comment.