Skip to content

Commit 5ee7644

Browse files
NTD: iterative mart cleanup, consistency, enrichment (#3870)
* NTD: correctly map schemas in dbt_project.yml * fear: update intemediate schema mapping in dbt_project.yml * feat: add intermediate pivot tables for ntd assets, carry through to marts * feat: create intermediate yml file fot nts assets * feat: add intermediate yml file for funding and expenses * feat: add new intermediate schema mapping to dbt_project.yml for ntd_annual_reporting * feat: new intermediate union table for ntd_annual_reporting contractual relationships over multiple years * feat: appropriately alias ntd_assets mart tables * feat: remove single-year mart tables * feat: create intemediate table for agency information, which does the work of the analyst agency mart table * feat: point analyst mart agency table at intermediate table as view * feat: point new ntd_annual_reporting schema agency dim at new intermediate table * feat: new unioned contractual relations hips table * feat: last two intermediate tables to unpivot, new preferred column order for marts * feat: normalize all uses of max_ and _5_digit_ntd_id use across the mart tables, slight reordering for use * feay: consistency around agency_name * Fix column documentation for dim_agency_information --------- Co-authored-by: Erika Pacheco <[email protected]>
1 parent bf63cae commit 5ee7644

File tree

93 files changed

+1385
-1036
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+1385
-1036
lines changed

warehouse/dbt_project.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ models:
5959
+labels:
6060
domain: staging
6161
dataset: ntd_annual_reporting
62+
ntd_assets:
63+
+labels:
64+
domain: staging
65+
dataset: ntd_assets
66+
ntd_funding_and_expenses:
67+
+labels:
68+
domain: staging
69+
dataset: ntd_funding_and_expenses
6270
ntd_ridership:
6371
+labels:
6472
domain: staging
@@ -103,6 +111,18 @@ models:
103111
+labels:
104112
domain: intermediate
105113
dataset: ntd
114+
ntd_assets:
115+
+labels:
116+
domain: intermediate
117+
dataset: ntd_assets
118+
ntd_annual_reporting:
119+
+labels:
120+
domain: intermediate
121+
dataset: ntd_annual_reporting
122+
ntd_funding_and_expenses:
123+
+labels:
124+
domain: intermediate
125+
dataset: ntd_funding_and_expenses
106126
ntd_validation:
107127
+labels:
108128
domain: intermediate
@@ -153,6 +173,12 @@ models:
153173
domain: mart
154174
dataset: ntd_annual_reporting
155175
schema: mart_ntd_annual_reporting
176+
ntd_assets:
177+
+materialized: table
178+
+labels:
179+
domain: mart
180+
dataset: ntd_assets
181+
schema: mart_ntd_assets
156182
ntd_funding_and_expenses:
157183
+materialized: table
158184
+labels:
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
WITH stg_ntd__2022_agency_information AS (
2+
SELECT
3+
*,
4+
-- TODO: this does not handle deletes
5+
LEAD(execution_ts) OVER (PARTITION BY ntd_id, state_parent_ntd_id ORDER BY execution_ts ASC) AS next_ts,
6+
FROM {{ ref('stg_ntd__2022_agency_information') }}
7+
),
8+
9+
stg_ntd__2023_agency_information AS (
10+
SELECT
11+
*,
12+
-- TODO: this does not handle deletes
13+
LEAD(execution_ts) OVER (PARTITION BY ntd_id, state_parent_ntd_id ORDER BY execution_ts ASC) AS next_ts,
14+
FROM {{ ref('stg_ntd__2023_agency_information') }}
15+
),
16+
17+
int_ntd__unioned_agency_information AS (
18+
SELECT
19+
{{ dbt_utils.generate_surrogate_key([2023, 'ntd_id', 'state_parent_ntd_id', 'execution_ts']) }} AS key,
20+
2023 AS year,
21+
ntd_id,
22+
state_parent_ntd_id,
23+
agency_name,
24+
reporter_acronym,
25+
doing_business_as,
26+
division_department,
27+
legacy_ntd_id,
28+
reported_by_ntd_id,
29+
reported_by_name,
30+
reporter_type,
31+
reporting_module,
32+
organization_type,
33+
subrecipient_type,
34+
fy_end_date,
35+
original_due_date,
36+
address_line_1,
37+
address_line_2,
38+
p_o__box,
39+
city,
40+
state,
41+
zip_code,
42+
zip_code_ext,
43+
region,
44+
url,
45+
fta_recipient_id,
46+
ueid,
47+
service_area_sq_miles,
48+
service_area_pop,
49+
primary_uza_uace_code AS primary_uza_code,
50+
uza_name AS primary_uza_name,
51+
tribal_area_name,
52+
population,
53+
density,
54+
sq_miles,
55+
voms_do,
56+
voms_pt,
57+
total_voms,
58+
volunteer_drivers,
59+
personal_vehicles,
60+
tam_tier,
61+
number_of_state_counties,
62+
number_of_counties_with_service,
63+
state_admin_funds_expended,
64+
execution_ts AS _valid_from,
65+
{{ make_end_of_valid_range('COALESCE(next_ts, CAST("2099-01-01" AS TIMESTAMP))') }} AS _valid_to,
66+
next_ts IS NULL AS _is_current,
67+
FROM stg_ntd__2023_agency_information
68+
69+
UNION ALL
70+
71+
SELECT
72+
{{ dbt_utils.generate_surrogate_key([2022, 'ntd_id', 'state_parent_ntd_id', 'execution_ts']) }} AS key,
73+
2022 AS year,
74+
ntd_id,
75+
state_parent_ntd_id,
76+
agency_name,
77+
reporter_acronym,
78+
doing_business_as,
79+
NULL AS division_department,
80+
legacy_ntd_id,
81+
reported_by_ntd_id,
82+
reported_by_name,
83+
reporter_type,
84+
reporting_module,
85+
organization_type,
86+
subrecipient_type,
87+
fy_end_date,
88+
original_due_date,
89+
address_line_1,
90+
address_line_2,
91+
p_o__box,
92+
city,
93+
state,
94+
zip_code,
95+
zip_code_ext,
96+
region,
97+
url,
98+
fta_recipient_id,
99+
ueid,
100+
service_area_sq_miles,
101+
service_area_pop,
102+
primary_uza_uace_code AS primary_uza_code,
103+
uza_name AS primary_uza_name,
104+
tribal_area_name,
105+
population,
106+
density,
107+
sq_miles,
108+
voms_do,
109+
voms_pt,
110+
total_voms,
111+
volunteer_drivers,
112+
personal_vehicles,
113+
tam_tier,
114+
number_of_state_counties,
115+
number_of_counties_with_service,
116+
state_admin_funds_expended,
117+
execution_ts AS _valid_from,
118+
{{ make_end_of_valid_range('COALESCE(next_ts, CAST("2099-01-01" AS TIMESTAMP))') }} AS _valid_to,
119+
next_ts IS NULL AS _is_current,
120+
FROM stg_ntd__2022_agency_information
121+
)
122+
123+
SELECT * FROM int_ntd__unioned_agency_information
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
WITH stg_ntd__2022_contractual_relationships AS (
2+
SELECT *
3+
FROM {{ ref('stg_ntd__2022_contractual_relationships') }}
4+
),
5+
6+
stg_ntd__2023_contractual_relationships AS (
7+
SELECT *
8+
FROM {{ ref('stg_ntd__2023_contractual_relationships') }}
9+
),
10+
11+
int_ntd__unioned_contractual_relationships AS (
12+
SELECT
13+
2022 AS year,
14+
other_reconciling_item_expenses_incurred_by_the_buyer,
15+
total_modal_expenses,
16+
contract_capital_leasing_expenses,
17+
direct_payment_agency_subsidy,
18+
months_seller_operated_in_fy,
19+
primary_feature,
20+
voms_under_contract,
21+
service_captured,
22+
fares_retained_by,
23+
other_party,
24+
other_public_assets_provided,
25+
buyer_supplies_vehicles_to_seller,
26+
contractee_ntd_id,
27+
pt_fare_revenues_passenger_fees,
28+
agency_name,
29+
tos,
30+
type_of_contract,
31+
reporter_contractual_position,
32+
other_operating_expenses_incurred_by_the_buyer,
33+
passenger_out_of_pocket_expenses,
34+
buyer_provides_maintenance_facility_to_seller,
35+
contractee_operator_name,
36+
mode,
37+
reporting_module,
38+
reporter_type,
39+
other_public_assets_provided_desc,
40+
ntd_id,
41+
dt,
42+
execution_ts
43+
FROM stg_ntd__2022_contractual_relationships
44+
45+
UNION ALL
46+
47+
SELECT
48+
2023 AS year,
49+
other_reconciling_item_expenses_incurred_by_the_buyer,
50+
total_modal_expenses,
51+
contract_capital_leasing_expenses,
52+
direct_payment_agency_subsidy,
53+
months_seller_operated_in_fy,
54+
primary_feature,
55+
voms_under_contract,
56+
service_captured,
57+
fares_retained_by,
58+
other_party,
59+
other_public_assets_provided,
60+
buyer_supplies_vehicles_to_seller,
61+
contractee_ntd_id,
62+
pt_fare_revenues_passenger_fees,
63+
agency_name,
64+
tos,
65+
type_of_contract,
66+
reporter_contractual_position,
67+
other_operating_expenses_incurred_by_the_buyer,
68+
passenger_out_of_pocket_expenses,
69+
buyer_provides_maintenance_facility_to_seller,
70+
contractee_operator_name,
71+
mode,
72+
reporting_module,
73+
reporter_type,
74+
other_public_assets_provided_desc,
75+
ntd_id,
76+
dt,
77+
execution_ts
78+
FROM stg_ntd__2023_contractual_relationships
79+
)
80+
81+
SELECT * FROM int_ntd__unioned_contractual_relationships
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
version: 2
2+
3+
models:
4+
- name: int_ntd__unioned_contractual_relationships
5+
- name: int_ntd__unioned_agency_information
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{{ config(materialized="table") }}
2+
3+
WITH
4+
source_pivoted AS (
5+
{{
6+
dbt_utils.unpivot(
7+
cast_to="int",
8+
relation=ref("stg_ntd__asset_inventory_time_series__active_fleet"),
9+
exclude=[
10+
"state",
11+
"uza_area_sq_miles",
12+
"ntd_id",
13+
"legacy_ntd_id",
14+
"uace_code",
15+
"last_report_year",
16+
"mode_status",
17+
"service",
18+
"_2023_mode_status",
19+
"agency_status",
20+
"uza_population",
21+
"mode",
22+
"uza_name",
23+
"city",
24+
"census_year",
25+
"reporting_module",
26+
"reporter_type",
27+
"agency_name",
28+
"dt",
29+
"execution_ts"
30+
],
31+
field_name="year",
32+
value_name="total",
33+
)
34+
}}
35+
),
36+
37+
int_ntd__asset_inventory_time_series_active_fleet AS (
38+
SELECT
39+
split(year, '_')[offset(1)] AS year,
40+
total,
41+
state,
42+
uza_area_sq_miles,
43+
ntd_id,
44+
legacy_ntd_id,
45+
uace_code,
46+
last_report_year,
47+
mode_status,
48+
service,
49+
_2023_mode_status,
50+
agency_status,
51+
uza_population,
52+
mode,
53+
uza_name,
54+
city,
55+
census_year,
56+
reporting_module,
57+
reporter_type,
58+
agency_name,
59+
dt,
60+
execution_ts
61+
FROM source_pivoted
62+
)
63+
64+
SELECT * FROM int_ntd__asset_inventory_time_series_active_fleet
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{{ config(materialized="table") }}
2+
3+
WITH
4+
source_pivoted AS (
5+
{{
6+
dbt_utils.unpivot(
7+
cast_to="int",
8+
relation=ref("stg_ntd__asset_inventory_time_series__ada_fleet"),
9+
exclude=[
10+
"state",
11+
"uza_area_sq_miles",
12+
"ntd_id",
13+
"legacy_ntd_id",
14+
"uace_code",
15+
"last_report_year",
16+
"mode_status",
17+
"service",
18+
"_2023_mode_status",
19+
"agency_status",
20+
"uza_population",
21+
"mode",
22+
"uza_name",
23+
"city",
24+
"census_year",
25+
"reporting_module",
26+
"reporter_type",
27+
"agency_name",
28+
"dt",
29+
"execution_ts"
30+
],
31+
field_name="year",
32+
value_name="total",
33+
)
34+
}}
35+
),
36+
37+
int_ntd__asset_inventory_time_series_ada_fleet AS (
38+
SELECT
39+
split(year, '_')[offset(1)] AS year,
40+
total,
41+
state,
42+
uza_area_sq_miles,
43+
ntd_id,
44+
legacy_ntd_id,
45+
uace_code,
46+
last_report_year,
47+
mode_status,
48+
service,
49+
_2023_mode_status,
50+
agency_status,
51+
uza_population,
52+
mode,
53+
uza_name,
54+
city,
55+
census_year,
56+
reporting_module,
57+
reporter_type,
58+
agency_name,
59+
dt,
60+
execution_ts
61+
FROM source_pivoted
62+
)
63+
64+
SELECT * FROM int_ntd__asset_inventory_time_series_ada_fleet

0 commit comments

Comments
 (0)