Skip to content

Commit

Permalink
fix bug with choose model, add more model metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrelefevre committed Jan 5, 2024
1 parent d67aa7c commit 79db3b9
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 13 deletions.
21 changes: 19 additions & 2 deletions api/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,26 @@ def load_models():

def choose_model(transformed_params):
if "askingPrice" in transformed_params:
return models["bostadspriser-with-askingPrice"]

return models["bostadspriser-without-askingPrice"]
with_asking_price_models = []
for name, model in models.items():
if "with-askingPrice" in name:
with_asking_price_models.append(model)

# sort by trainedAt
with_asking_price_models.sort(key=lambda x: x["metadata"]["trainedAt"])

return with_asking_price_models[-1]
else:
without_asking_price_models = []
for name, model in models.items():
if "without-askingPrice" in name:
without_asking_price_models.append(model)

# sort by trainedAt
without_asking_price_models.sort(key=lambda x: x["metadata"]["trainedAt"])

return without_asking_price_models[-1]


def get_live_listings(page: int, page_size: int):
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,36 @@
{"name": "bostadspriser-with-askingPrice", "nameWithDate": "bostadspriser-with-askingPrice-2024-01-05", "features": ["fee", "livingArea", "rooms", "runningCosts", "hasElevator", "hasBalcony", "lat", "long", "cpi", "hasHousingCooperative", "isPlot", "isWinterLeisureHouse", "isApartment", "isFarmWithForest", "isHouse", "isRowHouse", "isPairHouse", "isPairTerracedRowHouse", "isTerracedHouse", "isFarmWithoutForest", "isLeisureHouse", "isOtherHousingForm", "isFarmWithAgriculture", "age", "sinceLastRenovation", "soldYear", "soldMonth", "askingPrice"], "target": "finalPrice", "trainedAt": "2024-01-05T15:01:59.314123"}
{
"name": "bostadspriser-with-askingPrice",
"nameWithDate": "bostadspriser-with-askingPrice-2024-01-05",
"features": [
"fee",
"livingArea",
"rooms",
"runningCosts",
"hasElevator",
"hasBalcony",
"lat",
"long",
"cpi",
"hasHousingCooperative",
"isPlot",
"isWinterLeisureHouse",
"isApartment",
"isFarmWithForest",
"isHouse",
"isRowHouse",
"isPairHouse",
"isPairTerracedRowHouse",
"isTerracedHouse",
"isFarmWithoutForest",
"isLeisureHouse",
"isOtherHousingForm",
"isFarmWithAgriculture",
"age",
"sinceLastRenovation",
"soldYear",
"soldMonth",
"askingPrice"
],
"target": "finalPrice",
"trainedAt": "2024-01-05T15:01:59.314123"
}
15 changes: 11 additions & 4 deletions model/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,14 @@ def evaluate_model(
r2 = r2_score(y_test, y_pred)

print(f"[{name}] MSE: " + str(mse) + ", RMSE: " + str(rmse) + ", R^2: " + str(r2))
return gs_model, mse, rmse, r2
return gs_model, gs_model.best_params_, mse, rmse, r2


# def train, takes in a test train split and returns the best model and its results
def train(name, X_train, y_train, X_test, y_test):
results = {}
for regressor_name, regressor in regressors.items():
model, mse, rmse, r2 = evaluate_model(
model, best_params, mse, rmse, r2 = evaluate_model(
name,
regressor_name,
regressor["model"],
Expand All @@ -181,7 +181,7 @@ def train(name, X_train, y_train, X_test, y_test):
X_test,
y_test,
)
results[regressor_name] = {"model": model, "MSE": mse, "RMSE": rmse, "R^2": r2}
results[regressor_name] = {"model": model, "bestParams": best_params, "mse": mse, "rmse": rmse, "r2": r2}

return results

Expand Down Expand Up @@ -224,7 +224,7 @@ def main():
results_df = pd.DataFrame(results)
results_df = results_df.drop("model", axis=0)
results_df = results_df.transpose()
results_df = results_df.sort_values(by=["RMSE"])
results_df = results_df.sort_values(by=["rmse"])
best_model = results_df.index[0]
print(f"[{name}] Best model: " + best_model)

Expand Down Expand Up @@ -252,6 +252,13 @@ def main():
"features": setup["features"],
"target": setup["target"],
"trainedAt": now.isoformat(),
"model": {
"name": best_model,
"params": results[best_model]["bestParams"],
"mse": results[best_model]["mse"],
"rmse": results[best_model]["rmse"],
"r2": results[best_model]["r2"],
}
}
with open(f"{folder}/metadata.json", "w") as f:
json.dump(metadata, f)
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "bostadspriser-with-askingPrice", "nameWithDate": "bostadspriser-with-askingPrice-2024-01-05", "features": ["fee", "livingArea", "rooms", "runningCosts", "hasElevator", "hasBalcony", "lat", "long", "cpi", "hasHousingCooperative", "isPlot", "isWinterLeisureHouse", "isApartment", "isFarmWithForest", "isHouse", "isRowHouse", "isPairHouse", "isPairTerracedRowHouse", "isTerracedHouse", "isFarmWithoutForest", "isLeisureHouse", "isOtherHousingForm", "isFarmWithAgriculture", "age", "sinceLastRenovation", "soldYear", "soldMonth", "askingPrice"], "target": "finalPrice", "trainedAt": "2024-01-05T15:01:59.314123"}
{"name": "bostadspriser-with-askingPrice", "nameWithDate": "bostadspriser-with-askingPrice-2024-01-05", "features": ["fee", "livingArea", "rooms", "runningCosts", "hasElevator", "hasBalcony", "lat", "long", "cpi", "hasHousingCooperative", "isPlot", "isWinterLeisureHouse", "isApartment", "isFarmWithForest", "isHouse", "isRowHouse", "isPairHouse", "isPairTerracedRowHouse", "isTerracedHouse", "isFarmWithoutForest", "isLeisureHouse", "isOtherHousingForm", "isFarmWithAgriculture", "age", "sinceLastRenovation", "soldYear", "soldMonth", "askingPrice"], "target": "finalPrice", "trainedAt": "2024-01-05T15:10:07.275350", "model": {"name": "Linear Regression", "params": {}, "mse": 114645181342.62703, "rmse": 338592.94343300635, "r2": 0.9751360245953378}}
Binary file modified models/bostadspriser-with-askingPrice-2024-01-05/model.pkl
Binary file not shown.
4 changes: 2 additions & 2 deletions models/bostadspriser-with-askingPrice-2024-01-05/results.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
,MSE,RMSE,R^2
Linear Regression,114645181342.62703,338592.94343300635,0.9751360245953378
,bestParams,mse,r2,rmse
Linear Regression,{},114645181342.62703,0.9751360245953378,338592.94343300635
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "bostadspriser-without-askingPrice", "nameWithDate": "bostadspriser-without-askingPrice-2024-01-05", "features": ["fee", "livingArea", "rooms", "runningCosts", "hasElevator", "hasBalcony", "lat", "long", "cpi", "hasHousingCooperative", "isPlot", "isWinterLeisureHouse", "isApartment", "isFarmWithForest", "isHouse", "isRowHouse", "isPairHouse", "isPairTerracedRowHouse", "isTerracedHouse", "isFarmWithoutForest", "isLeisureHouse", "isOtherHousingForm", "isFarmWithAgriculture", "age", "sinceLastRenovation", "soldYear", "soldMonth"], "target": "finalPrice", "trainedAt": "2024-01-05T15:01:52.948934"}
{"name": "bostadspriser-without-askingPrice", "nameWithDate": "bostadspriser-without-askingPrice-2024-01-05", "features": ["fee", "livingArea", "rooms", "runningCosts", "hasElevator", "hasBalcony", "lat", "long", "cpi", "hasHousingCooperative", "isPlot", "isWinterLeisureHouse", "isApartment", "isFarmWithForest", "isHouse", "isRowHouse", "isPairHouse", "isPairTerracedRowHouse", "isTerracedHouse", "isFarmWithoutForest", "isLeisureHouse", "isOtherHousingForm", "isFarmWithAgriculture", "age", "sinceLastRenovation", "soldYear", "soldMonth"], "target": "finalPrice", "trainedAt": "2024-01-05T15:10:01.278979", "model": {"name": "Linear Regression", "params": {}, "mse": 3155263694507.91, "rmse": 1776306.1939057438, "r2": 0.3156938828418222}}
Binary file modified models/bostadspriser-without-askingPrice-2024-01-05/model.pkl
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
,MSE,RMSE,R^2
Linear Regression,3155263694507.91,1776306.1939057438,0.3156938828418222
,bestParams,mse,r2,rmse
Linear Regression,{},3155263694507.91,0.3156938828418222,1776306.1939057438

0 comments on commit 79db3b9

Please sign in to comment.