Skip to content

Commit

Permalink
Merge pull request #5090 from braykuka/in-events-scraper-fails-on-par…
Browse files Browse the repository at this point in the history
…sing-api-response

IN Events: scraper fails on parsing api on response
  • Loading branch information
jessemortenson authored Nov 20, 2024
2 parents 6bb5cbb + 85965ec commit 4590e04
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
6 changes: 5 additions & 1 deletion scrapers/in/apiclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,15 @@ def get_session_no(self, session):
headers["Accept"] = "application/json"
headers["User-Agent"] = self.user_agent
url = urljoin(self.root, f"/{session}")

resp = requests.get(url, headers=headers).json()
if "message" in resp:
raise Exception(resp["message"])
session_no_regex = re.search(r"Session\s+(\d+).+", resp["name"])

if session_no_regex:
session_no = session_no_regex.group(1)
else:
raise Exception("Invalid session")

return session_no

Expand Down
4 changes: 2 additions & 2 deletions scrapers/in/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytz
from openstates.scrape import Scraper, Event
from .apiclient import ApiClient
from .utils import add_space
from .utils import add_space, backoff
from openstates.exceptions import EmptyScrape


Expand All @@ -21,7 +21,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def scrape(self):
session_no = self.apiclient.get_session_no(self.session)
session_no = backoff(self.apiclient.get_session_no, self.session)
response = self.apiclient.get("meetings", session=self.session)

meetings = response["meetings"]
Expand Down
26 changes: 26 additions & 0 deletions scrapers/in/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import requests
import logging
import time

logging.getLogger("IN").setLevel(logging.WARNING)
log = logging.getLogger("openstates")


def get_with_increasing_timeout(scraper, link, fail=False, kwargs={}):
Expand Down Expand Up @@ -57,3 +62,24 @@ def add_space(text):
new_string = f"{alpha} {number}"

return new_string


def backoff(function, *args, **kwargs):
retries = 5

def _():
time.sleep(1)
return function(*args, **kwargs)

for attempt in range(retries):
try:
return _()
except Exception as e:
backoff = (attempt + 1) * 15
log.warning(
"[attempt %s]: %s. Backing off for %s seconds."
% (attempt, str(e), backoff)
)
time.sleep(backoff)

raise Exception("INDIANA API returns still None. Please confirm API status.")

0 comments on commit 4590e04

Please sign in to comment.