-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
67 lines (59 loc) · 1.97 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import numpy
from sklearn import preprocessing
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
dataset=pd.read_csv("train.csv")
'''
pre = preprocessing.LabelEncoder()
pre.fit(dataset['skills'])
dataset['skills'] = pre.transform(dataset['skills'])
pre.fit(dataset['tag'])
dataset['tag'] = pre.transform(dataset['tag'])
'''
test=pd.read_csv("test.csv")
'''
pre.fit(test['skills'])
test['skills'] = pre.transform(test['skills'])
pre.fit(test['tag'])
test['tag'] = pre.transform(test['tag'])
'''
print dataset
print test
x=[]
y=[]
col=['user_id','problem_id','accuracy','error_count','solved_count_y','solved_count_x','attempts','level','rating','skills','tag','user_type']
x=dataset[col]
y=dataset['solved_status']
'''
bagging=BaggingClassifier(RandomForestClassifier(),max_samples=0.5,max_features=0.5)
bagging.fit(x,y)
predict=bagging.predict(test)
predict1=pd.DataFrame(predict)
predict1.to_csv("/home/flash/Pictures/india_hacks_ml/will_bill_solve_it/new_sub.csv")
'''
clf1 = AdaBoostClassifier(n_estimators = 1000)
clf1.fit(x,y)
predict1=clf1.predict(test)
predict1=pd.DataFrame(predict1)
#predict1.to_csv("ada.csv")
print "Model 1 "
clf2 = RandomForestClassifier(n_estimators=500,max_features=5,max_depth=None,min_samples_split=1,bootstrap=True,n_jobs=-1)
clf2.fit(x,y)
predict2=clf2.predict(test)
predict2=pd.DataFrame(predict2)
#predict2.to_csv("rf.csv")
print "Model 2 "
clf=VotingClassifier(estimators=[('ada', clf1), ('rf', clf2)], voting='hard')
clf.fit(x,y)
predict=clf.predict(test)
predict=pd.DataFrame(predict)
predict.to_csv("subcsv")