From d6050a3c9d26ba177d3233b22bd7300ac78f94a7 Mon Sep 17 00:00:00 2001 From: Haresh Kainth Date: Tue, 10 Dec 2024 10:34:58 +0000 Subject: [PATCH] chore:Handle failed URL fetches in legislation cache building Add handling for failed URL fetches by logging errors and storing them in a list instead of raising exceptions. This ensures that the process can continue attempting to build the cache while logging a comprehensive list of any failed sources. --- app/cache/legislation.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/app/cache/legislation.py b/app/cache/legislation.py index de2e19b..152c2ac 100644 --- a/app/cache/legislation.py +++ b/app/cache/legislation.py @@ -4,6 +4,7 @@ import logging import re +import time import xml.etree.ElementTree as ET # nosec BXXX from typing import Optional @@ -123,6 +124,8 @@ def build_cache(self, config: SearchDocumentConfig): logger.info("building legislation cache...") dataset = construction_legislation_dataframe() + failed_url_fetches = [] + # For each row, get the URL from the column named # 'URI to Extract XML Data' # and store the XML data in a list @@ -185,10 +188,15 @@ def build_cache(self, config: SearchDocumentConfig): # Insert or update the document insert_or_update_document(document_json) + + # # Sleep for a short time to avoid rate limiting + # time.sleep(0.5) except Exception as e: logger.error(f"error fetching data from {url}: {e}") - raise e + failed_url_fetches.append(url) + if failed_url_fetches: + logger.warning(f"failed to fetch data {len(failed_url_fetches)} legislation sources: {failed_url_fetches}") def _to_json( self, description,