From 0ca4a89c311d9da726e150017b39610a2523cb3a Mon Sep 17 00:00:00 2001 From: Charles Costanzo Date: Fri, 22 Nov 2024 14:08:34 -0500 Subject: [PATCH] revisions based on Mjumbe's review --- airflow/plugins/operators/scrape_state_geoportal.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/airflow/plugins/operators/scrape_state_geoportal.py b/airflow/plugins/operators/scrape_state_geoportal.py index 4d94619d0b..a538df5495 100644 --- a/airflow/plugins/operators/scrape_state_geoportal.py +++ b/airflow/plugins/operators/scrape_state_geoportal.py @@ -1,7 +1,6 @@ import gzip import logging - -# import os +import os from typing import ClassVar, List import pandas as pd # type: ignore @@ -12,8 +11,7 @@ from airflow.models import BaseOperator # type: ignore -API_BUCKET = "gs://calitp-state-geoportal-scrape" -# API_BUCKET = os.environ["CALITP_BUCKET__STATE_GEOPORTAL_DATA_PRODUCTS"] +API_BUCKET = os.environ["CALITP_BUCKET__STATE_GEOPORTAL_DATA_PRODUCTS"] class StateGeoportalAPIExtract(PartitionedGCSArtifact): @@ -88,7 +86,8 @@ def fetch_from_state_geoportal(self): params["resultOffset"] = offset # Make the request - response = requests.get(validated_url, params=params).raise_for_status() + response = requests.get(validated_url, params=params) + response.raise_for_status() data = response.json() # Break the loop if there are no more features @@ -187,7 +186,7 @@ def execute(self, **kwargs): df = pd.json_normalize(api_content) if self.product == "state_highway_network": - # Select columns to keep, have to be explicit because there are duplicate values after normalizing + # Select columns to keep, have to be explicit before renaming because there are duplicate values after normalizing df = df[ [ "properties.Route",