-
Notifications
You must be signed in to change notification settings - Fork 0
/
Regressor.py
25 lines (22 loc) · 953 Bytes
/
Regressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import numpy as np
import pandas as pd
import lightgbm
from lightgbm import LGBMRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_log_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
df = pd.read_csv("/Volumes/Data/Learning_Resources/Kaggle/mercari-train.tsv", sep="\t")
df['brand_name'].fillna('Not Available', inplace=True)
df['category_name'].fillna('Not Available', inplace=True)
df['item_description'].fillna('Not Available', inplace=True)
X = df[['item_condition_id', 'shipping']]
Y = df['price']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
# clf = RandomForestRegressor(n_jobs=-1,min_samples_leaf=3,n_estimators=200)
# clf = DecisionTreeRegressor(random_state=0)
clf = LGBMRegressor()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
rmsle = np.sqrt(mean_squared_log_error(y_test, pred))
print(rmsle)