Skip to content

Commit

Permalink
substituted nan with means
Browse files Browse the repository at this point in the history
  • Loading branch information
Coerulatus committed May 8, 2024
1 parent 81a098a commit f2b6a5c
Showing 1 changed file with 17 additions and 7 deletions.
24 changes: 17 additions & 7 deletions topobenchmarkx/io/load/us_county_demos.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,17 @@ def load_us_county_demos(path, year=2012, y_col="Election"):
"BachelorRate",
"UnemploymentRate",
]
# Drop rows with missing values

# Select columns, replace ',' with '.' and convert to numeric
stat = stat.loc[:, keep_cols]
stat["MedianIncome"] = stat["MedianIncome"].replace(',','.', regex=True)
stat = stat.apply(pd.to_numeric, errors='coerce')

# Step 2: Substitute NaN values with column mean
for column in stat.columns:
if column != "FIPS":
mean_value = stat[column].mean()
stat[column].fillna(mean_value, inplace=True)
stat = stat[keep_cols].dropna()

# Delete edges that are not present in stat df
Expand Down Expand Up @@ -103,12 +113,12 @@ def load_us_county_demos(path, year=2012, y_col="Election"):

x_col = list(set(stat.columns).difference(set([y_col])))

stat["MedianIncome"] = (
stat["MedianIncome"]
.apply(lambda x: x.replace(",", ""))
.to_numpy()
.astype(float)
)
# stat["MedianIncome"] = (
# stat["MedianIncome"]
# .apply(lambda x: x.replace(",", ""))
# .to_numpy()
# .astype(float)
# )

x = torch.tensor(stat[x_col].to_numpy(), dtype=torch.float32)
y = torch.tensor(stat[y_col].to_numpy(), dtype=torch.float32)
Expand Down

0 comments on commit f2b6a5c

Please sign in to comment.