diff --git a/scrapers/in/apiclient.py b/scrapers/in/apiclient.py index 083c0749ab..60f49a1d59 100644 --- a/scrapers/in/apiclient.py +++ b/scrapers/in/apiclient.py @@ -89,11 +89,15 @@ def get_session_no(self, session): headers["Accept"] = "application/json" headers["User-Agent"] = self.user_agent url = urljoin(self.root, f"/{session}") + resp = requests.get(url, headers=headers).json() + if "message" in resp: + raise Exception(resp["message"]) session_no_regex = re.search(r"Session\s+(\d+).+", resp["name"]) - if session_no_regex: session_no = session_no_regex.group(1) + else: + raise Exception("Invalid session") return session_no diff --git a/scrapers/in/events.py b/scrapers/in/events.py index 8bbe227647..70dc46fe4c 100644 --- a/scrapers/in/events.py +++ b/scrapers/in/events.py @@ -7,7 +7,7 @@ import pytz from openstates.scrape import Scraper, Event from .apiclient import ApiClient -from .utils import add_space +from .utils import add_space, backoff from openstates.exceptions import EmptyScrape @@ -21,7 +21,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def scrape(self): - session_no = self.apiclient.get_session_no(self.session) + session_no = backoff(self.apiclient.get_session_no, self.session) response = self.apiclient.get("meetings", session=self.session) meetings = response["meetings"] diff --git a/scrapers/in/utils.py b/scrapers/in/utils.py index cc624994e4..7c71c5b640 100644 --- a/scrapers/in/utils.py +++ b/scrapers/in/utils.py @@ -1,4 +1,9 @@ import requests +import logging +import time + +logging.getLogger("IN").setLevel(logging.WARNING) +log = logging.getLogger("openstates") def get_with_increasing_timeout(scraper, link, fail=False, kwargs={}): @@ -57,3 +62,24 @@ def add_space(text): new_string = f"{alpha} {number}" return new_string + + +def backoff(function, *args, **kwargs): + retries = 5 + + def _(): + time.sleep(1) + return function(*args, **kwargs) + + for attempt in range(retries): + try: + return _() + except Exception as e: + backoff = (attempt + 1) * 15 + log.warning( + "[attempt %s]: %s. Backing off for %s seconds." + % (attempt, str(e), backoff) + ) + time.sleep(backoff) + + raise Exception("INDIANA API returns still None. Please confirm API status.")