Skip to content

Commit

Permalink
Fix LA 2023 rows that were getting dropped
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-kap committed Jul 21, 2024
1 parent 35e663f commit 6b3b104
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion python/housing_data/california_hcd_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def load_california_hcd_data(
df = df[df["UNIT_CAT_DESC"] != "Mobile Home Unit"].copy()

df["units"] = df[BUILDING_PERMIT_COLUMNS].sum(axis="columns", numeric_only=True)

df = df[
(df["units"] > 0)
# Exclude rows with a certificate of occupancy, because it's very unlikely
Expand All @@ -45,7 +46,7 @@ def load_california_hcd_data(
# permit anyway.
# NB: I only looked at LA data to validate this assumption. The data looks
# _way_ more accurate when we drop these rows.
& df["CO_ISSUE_DT1"].isnull()
& (df["CO_ISSUE_DT1"].isnull() | ((df["BP_ISSUE_DT1"] == df["CO_ISSUE_DT1"])))
].copy()

df["building_type"] = np.select(
Expand Down

0 comments on commit 6b3b104

Please sign in to comment.