-
Notifications
You must be signed in to change notification settings - Fork 0
/
dt.py
83 lines (75 loc) · 3.46 KB
/
dt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from sklearn import tree
import preprocess
import pandas as pd
import numpy as np
class DecisionTree():
#Constructor
def __init__(self, trainPath, testPath):
self.preprocessUtil = preprocess.PreprocessUtil()
self.trainPath = trainPath
self.testPath = testPath
self.dataFrame = pd.read_csv(self.trainPath)
self.model = tree.DecisionTreeClassifier()
self.parseData()
#Parse Data
def parseData(self):
self.dataFrame = self.dataFrame[self.dataFrame["NflIdRusher"]
== self.dataFrame["NflId"]]
self.dataFrame = self.preprocessUtil.dropColumns(self.dataFrame, [
"Orientation", "DisplayName", "JerseyNumber", "VisitorScoreBeforePlay",
"HomeScoreBeforePlay", "PlayerBirthDate", "VisitorTeamAbbr", "HomeTeamAbbr",
"Stadium", "Team", "TimeHandoff", "TimeSnap", "GameClock", "PossessionTeam",
"PlayerCollegeName", "Location", "FieldPosition", "DefensePersonnel",
"PlayDirection", "PlayerBirthDate", "Position"])
#Offense Formation
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "OffenseFormation")
#Offense Personnel
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "OffensePersonnel")
#Wind Direction
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "WindDirection")
#Turf
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "Turf")
#Player Height
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "PlayerHeight")
#Stadium Type
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "StadiumType")
#Game Weather
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "GameWeather")
#Defenders in the Box
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "DefendersInTheBox")
#Wind Speed
self.dataFrame = self.preprocessUtil.discretizeValues(
self.dataFrame, "WindSpeed")
#Train model
def trainModel(self):
trainFrame = self.dataFrame.head(n=20000)
trainLabels = trainFrame["Yards"].values
trainFrame = trainFrame.drop(["Yards"], axis=1)
trainFrame = trainFrame.fillna(value=1.0)
trainVectors = trainFrame.values
self.model.fit(trainVectors, trainLabels)
#Predict
def predict(self):
testFrame = self.dataFrame.tail(n=3000)
testLabels = testFrame["Yards"].values
testFrame = testFrame.drop(["Yards"], axis=1)
testFrame = testFrame.fillna(value=1.0)
testVectors = testFrame.values
predictions = self.model.predict(testVectors)
numCorrect = 0.0
for prediction, trueYardage in zip(predictions, testLabels):
if abs(trueYardage - prediction) <= 3.0:
numCorrect += 1
print("Test Accuracy: %.2f%%" % (numCorrect / len(predictions) * 100))
if __name__ == "__main__":
bayesianModel = DecisionTree("train.csv", None)
bayesianModel.trainModel()
bayesianModel.predict()