Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v6.0.0
hooks:
- id: trailing-whitespace
exclude_types: ['markdown']
Expand All @@ -21,7 +21,7 @@ repos:
# Suppress SyntaxWarning about invalid escape sequence from calitp-data-infra dependency without modifying source
entry: env PYTHONWARNINGS="ignore::SyntaxWarning" flake8
- repo: https://github.com/psf/black
rev: 23.1.0
rev: 25.9.0
hooks:
- id: black
args: ["--config=./pyproject.toml"]
Expand Down
28 changes: 16 additions & 12 deletions airflow/dags/airtable_loader_v2/generate_gtfs_download_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,22 @@ def gtfs_datasets_to_extract_configs(
name=record.name,
url=record.pipeline_url,
feed_type=record.data,
auth_query_params={
record.authorization_url_parameter_name: record.url_secret_key_name
}
if record.authorization_url_parameter_name
and record.url_secret_key_name
else {},
auth_headers={
record.authorization_header_parameter_name: record.header_secret_key_name
}
if record.authorization_header_parameter_name
and record.header_secret_key_name
else {},
auth_query_params=(
{
record.authorization_url_parameter_name: record.url_secret_key_name
}
if record.authorization_url_parameter_name
and record.url_secret_key_name
else {}
),
auth_headers=(
{
record.authorization_header_parameter_name: record.header_secret_key_name
}
if record.authorization_header_parameter_name
and record.header_secret_key_name
else {}
),
),
)
except ValidationError as e:
Expand Down
8 changes: 5 additions & 3 deletions airflow/dags/dags.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ def log_failure_to_slack(context):
wait_for_defaults={"retries": 24, "check_existence": True, "timeout": 10 * 60},
latest_only=False,
user_defined_macros={
"image_tag": lambda: "development"
if os.environ["AIRFLOW_ENV"] == "development"
else "latest",
"image_tag": lambda: (
"development"
if os.environ["AIRFLOW_ENV"] == "development"
else "latest"
),
"env_var": os.environ.get,
},
default_args={
Expand Down
1 change: 1 addition & 0 deletions airflow/plugins/operators/external_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
However, it's cumbersome to convert the http api style schema fields to SQL, so
we provide a fallback for these old-style tasks.
"""

from google.api_core.exceptions import NotFound
from google.cloud import bigquery
from utils import CALITP_BQ_LOCATION, CALITP_PROJECT_NAME
Expand Down
13 changes: 7 additions & 6 deletions airflow/plugins/scripts/gtfs_rt_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Parses binary RT feeds and writes them back to GCS as gzipped newline-delimited JSON
"""

import base64
import copy
import datetime
Expand Down Expand Up @@ -414,9 +415,9 @@ def where_base64url(self, base64_url: Optional[str]):
def get_aggregates(
self,
) -> List[RTHourlyAggregation]:
aggregates: Dict[
Tuple[pendulum.DateTime, str], List[GTFSRTFeedExtract]
] = defaultdict(list)
aggregates: Dict[Tuple[pendulum.DateTime, str], List[GTFSRTFeedExtract]] = (
defaultdict(list)
)

for file in self.files:
if self.base64_url is None or file.base64_url == self.base64_url:
Expand Down Expand Up @@ -892,9 +893,9 @@ def parse_and_validate(

def make_dict_bq_safe(d: Dict[str, Any]) -> Dict[str, Any]:
return {
make_name_bq_safe(key): make_dict_bq_safe(value)
if isinstance(value, dict)
else value
make_name_bq_safe(key): (
make_dict_bq_safe(value) if isinstance(value, dict) else value
)
for key, value in d.items()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Utility functions for geospatial data.
Some functions for dealing with census tract or other geographic unit dfs.
"""

from typing import Literal, Union, cast

import dask.dataframe as dd
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
https://github.com/CityOfLosAngeles/los-angeles-citywide-data-style

"""

import altair as alt # type: ignore
from calitp_data_analysis import calitp_color_palette as cp

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
General utility functions.
"""

import base64
import os
import shutil
Expand Down
6 changes: 3 additions & 3 deletions packages/calitp-data-infra/calitp_data_infra/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,9 +643,9 @@ def build_request(self, auth_dict: Mapping[str, str]) -> Request:
headers = {k: auth_dict[v] for k, v in self.auth_headers.items()}

# some web servers require user agents or they will throw a 4XX error
headers[
"User-Agent"
] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0"
headers["User-Agent"] = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0"
)

# inspired by: https://stackoverflow.com/questions/18869074/create-url-without-request-execution
return Request(
Expand Down
7 changes: 4 additions & 3 deletions warehouse/scripts/dbt_artifacts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Built off the starting point of https://guitton.co/posts/dbt-artifacts
"""

import abc
import os
from enum import Enum
Expand Down Expand Up @@ -64,9 +65,9 @@ def num_bytes(self) -> Optional[int]:
CatalogTable.num_bytes = property(num_bytes) # type: ignore[attr-defined]

DependsOn.resolved_nodes = property( # type: ignore[attr-defined]
lambda self: [NodeModelMixin._instances[node] for node in self.nodes]
if self.nodes
else []
lambda self: (
[NodeModelMixin._instances[node] for node in self.nodes] if self.nodes else []
)
)
ColumnInfo.publish = property(lambda self: self.meta.get("publish.include", False)) # type: ignore[attr-defined]

Expand Down
6 changes: 3 additions & 3 deletions warehouse/scripts/dbt_artifacts/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ class ManifestMetadata(BaseModel):
class Config:
extra = Extra.allow

dbt_schema_version: Optional[
str
] = "https://schemas.getdbt.com/dbt/manifest/v9.json"
dbt_schema_version: Optional[str] = (
"https://schemas.getdbt.com/dbt/manifest/v9.json"
)
dbt_version: Optional[str] = "1.6.0a1"
generated_at: Optional[datetime] = "2023-04-21T11:09:06.496436Z"
invocation_id: Optional[str] = "c4b245be-8edb-4ad7-ba54-9337ce594f5d"
Expand Down
7 changes: 4 additions & 3 deletions warehouse/scripts/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

TODO: consider using https://github.com/ckan/ckanapi?
"""

import csv
import enum
import functools
Expand Down Expand Up @@ -528,9 +529,9 @@ def _publish_exposure(
["geometry_to_publish"] + destination.metadata_columns
]
gdf.to_file(geojsonl_fpath, driver="GeoJSONSeq")
layer_geojson_paths[
strip_modelname(node.name).title()
] = geojsonl_fpath
layer_geojson_paths[strip_modelname(node.name).title()] = (
geojsonl_fpath
)
hive_path = destination.hive_path(
exposure=exposure,
model=strip_modelname(node.name),
Expand Down
1 change: 1 addition & 0 deletions warehouse/scripts/visualize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Provide more visualizations than what dbt provides.
"""

import json
import os
import webbrowser
Expand Down
1 change: 1 addition & 0 deletions warehouse/seeds/transit_facilities_to_csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This script reads a GeoJSON file containing transit facility data, processes it to standardize column names
and formats, and then exports the relevant data to a CSV file for import as a seed."""

import json

import geopandas as gpd
Expand Down
Loading