-
Notifications
You must be signed in to change notification settings - Fork 20
/
example_officer_config.yaml
278 lines (247 loc) · 12.1 KB
/
example_officer_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
################################################
# UChicago DSaPP Police EIS #
# Model experiments #
################################################
# Name of the department:
department_unit: 'pd'
# Set the officer table name.
officer_label_table_name: "pd_label"
schema_feature_blocks: "pd_features"
# production parameters
production_officer_label_table_name: "production_labels"
production_schema_feature_blocks: "production_feature_blocks"
# determine whether model objects gets stored in a pickle in root_path/department_unit/directory
store_model_object: False
# directory for storing matricies
project_path: '/localdisk/triage/'
########################
# Comment fields #
########################
model_comment: ""
batch_comment: ""
test_flag: False
########################
# Type of Experiment #
########################
unit: 'officer' # other options: 'dispatch'
########################
# Temporal parameters #
########################
temporal_info:
start_date: '2009-01-01' # The first day of the data
end_date: '2016-09-01' # The last day of the data
train_size: ['1y','2y','3y','4y','5y','6y','7y'] # ['3y','5y']
prediction_window: ['1m','3m','6m','9m','1y'] # prediction window used for officer-level prediction eg: ['1d', '1w', '1m', '6m', '1y']
update_window: ['1m'] # update window used for updating the model eg: ['1d', '1w', '1m', '6m', '1y']
features_frequency: ['1m','2m','3m','6m','9m','1y'] # months, used for training the model
test_frequency: ['1d'] # which intervall to use for testing
test_time_ahead: ['0d'] # how many days ahead the same model will be used for scoring and generating evaluations
officer_past_activity_window: ['1y'] # Include officers that have activity in the x months preceding start time of test/train
timegated_feature_lookback_duration: ["P1D", "P1W", "P1M", "P1Y", "P5Y"] # Aggregation time for features
########################
# Labelling Details #
########################
labels:
# ['Major', 'Minor'] for or conditions for and conditions list them underneath
- ['Major']
- ['Sustained']
########################
# Feature selection #
########################
officer_features: ['IncidentsReported', 'IncidentsCompleted', 'OfficerShifts', 'OfficerArrests', 'TrafficStops', 'FieldInterviews', 'Dispatches', 'DemographicNpaArrests', 'OfficerCharacteristics','OfficerEmployment', 'OfficerCompliments']
leave_out: 0 # Iterates through all officer_features blocks leaving X out
feature_blocks:
IncidentsReported:
SuspensionsOfType: True # time-gated, The number of suspensions an officer has had
HoursSuspensionsOfType: True # time-gated, The number of hours of suspension an officer has had
InterventionsOfType: True # time-gated, The number of interventions of each type an officer has had
AllAllegations: True # time-gated, Number of allegations made against an officer
IncidentsOfType: True # time-gated, The number of reported incidents incidents of certain type
IncidentsOfTypeDep: True # time-gated, The number of reported incidents incidents of certain type department specific
ComplaintsTypeSource: True # time-gated, Number of complaints by source an officer had
DaysSinceLastAllegation: True # The number of days since the last allegation was made against the officer.
IncidentsCompleted:
IncidentsByOutcome: True # time-gated, The number of incidents by type of outcome
IncidentsOfTypeOutSust: True
IncidentsOfTypeUnSust: True
IncidentsOfTypeUnknown: True
IncidentsOfTypeDepOutSust: True
IncidentsOfTypeDepUnSust: True
IncidentsOfTypeDepUnknown: True
DaysSinceLastCompletedAllegation: True # time-gated, The number of days since the last sustained allegation was made against the officer
ComplaintsTypeSourceOutSustained: True # time-gated, The number sustained complaints by source (internal, external, unknown)
ComplaintsTypeSourceOutUnSustained: True # time-gated, The number unsustained complaints by source (internal, external, unknown)
ComplaintsTypeSourceOutUnknown: True # time-gated, The number sustained complaints by source (internal, external, unknown)
OfficerCompliments:
Compliments: True # time-gated, Number of Compliments an officer received
OfficerShifts:
ShiftsOfType: True # time-gated, The Number of time-gated shifts by categorical type
HoursPerShift: True # time-gated, The Average number of hours per shift
OfficerArrests:
ArrestMonthlyVariance: True # time-gated, month-by-month variance of arrest counts.
ArrestMonthlyCOV: False # time-gated, month-by-month coefficient of variation in arrest counts. ##TODO: in a collate branch
Arrests: True # time-gated, Number of arrests made by an officer
ArrestsOfType: True # Number of time-gated arrests by categorical type.
ArrestsON: True # Number of time-gated arrests by day of week.
SuspectsArrestedOfRace: True #Number of suspects arrested by race type, time-gated periods.
SuspectsArrestedOfEthnicity: True #Number of suspects arrested by ethnicity type, time-gated periods.
ArrestsCrimeType: True # time-gated, the number of crimes by ucr4 grouped
TrafficStops:
TrafficStopsWithSearch: True # time gated
TrafficStopsWithUseOfForce: True # time gated
TrafficStops: True # time gated
TrafficStopsWithArrest: True # time gated
TrafficStopsWithInjury: True # time gated
TrafficStopsWithOfficerInjury: True # time gated
TrafficStopsWithSearchRequest: True # time gated
TrafficStopsByRace: True # time gated categorical by stopped person's race
TrafficStopsByStopType: True # time gated categorical
TrafficStopsByStopResult: True # time gated categorical
TrafficStopsBySearchReason: True
TrafficStopsByInterestingSearch: False #### TODO: create text features
FieldInterviews:
FieldInterviews: True
HourOfFieldInterviews: True
ModeHourOfFieldInterviews: True
FieldInterviewsByRace: True
FieldInterviewsByOutcome: True
FieldInterviewsWithFlag: True
InterviewsType: True
UseOfForce: ##TODO
UsesOfForceOfType: True # The number of uses of force by type of force over time-gated periods.
UnjustifiedUsesOfForceOfType: True # The number of unjustified uses of force by time over time-gated periods.
UnjustUOFInterventionsOfType: True #Number of interventions of type X following an unjustified force, time gated.
UOFwithSuspectInjury: False #Number of uses of force by whether the suspect was injured, time gated ##TODO
SuspectInjuryToUOFRatio: False #Ratio of suspect injuries to uses of force that an officer has, time gated.##TODO
Dispatches:
DispatchType: True # Number of dispatches of different type aggregated over time
DispatchInitiatiationType: True
DispatchDivision: True
DispatchMovement: True
EISAlerts: ##TODO
EISInterventionsOfType: True
FractionEISFlagsWithIntervention: False ## TODO
EISFlagsOfType: True
OfficerCharacteristics:
DummyOfficerMarital: False # The marital status of the officer. The problem here is that it comes without a date, eg traning in the past as if an officer was always married or divorced.
DummyOfficerGender: True # Officer gender code.
DummyOfficerRace: True # Officer race code.
DummyOfficerEthnicity: True # Officer ethnicity code.
OfficerAge: True # Age of officer in years.
DummyOfficerEducation: True # Officer education level.
MilesFromPost: False # Number of miles to post. ## TODO
DummyOfficerMilitary: True # Whether or not the officer has had military experience
AcademyScore: True # Performance score at the police academy.
DummyOfficerRank: True # Officer rank.
OfficerEmployment:
OutsideEmploymentHours: True # Extra duty
OutsideEmploymentIncome: True # income generated form it
Other:
CountUOFwithResistingArrest: True #Number of uses of force by whether the suspect resisted arrest, time gated
ResistingArrestToUOFRatio: False #Ratio of resisting arrest to uses of force that an officer has, time gated.
ComplaintToArrestRatio: False # The ratio of complaints per arrest.
ComplaintsPerHourWorked: False # The rate of complaints per hour worked.
UOFtoArrestRatio: True #Ratio of uses of force per arrest ratio, time gated.
ComplimentsToComplaintsRatio: False #Ratio of internal compliments to complaints and officer has
DemographicNpaArrests:
Arrests311Call: True
Arrests311Requests: True
PopulationDensity: True
AgeOfResidents: True
BlackPopulation: True
HouseholdIncome: True
EmploymentRate: True
VacantLandArea: True
VoterParticipation: True
AgeOfDeath: True
HousingDensity: True
NuisanceViolations: True
ViolentCrimeRate: True
PropertyCrimeRate: True
SidewalkAvailability: True
Foreclosures: True
DisorderCallRate: True
########################
# Model selection #
########################
#ALL MODEL TYPES
#model: ['RandomForest', 'RandomForestBagging', 'RandomForestBoosting', 'ExtraTrees',
# 'AdaBoost', 'LogisticRegression', 'SVM', 'GradientBoostingClassifier',
# 'DecisionTreeClassifier', 'SGDClassifier', 'KNeighborsClassifier']
model: ['RandomForest','ExtraTrees']
parameters:
RandomForest:
n_estimators: [10000] #[50,100, 1000, 10000, 50000]
max_depth: [5,10] #[100, 5, 10, 50]
max_features: ['log2'] # ['log2', 2, 4, 8, 16, "auto"]
criterion: ['gini'] # ['entropy']
min_samples_split: [2,5] #[2, 5, 10]
random_state: [2193]
n_jobs: [-1]
RandomForestBagging:
n_estimators: [10] # [25, 50, 100, 1000, 10000]
max_depth: [5] # [10, 20, 50, 100]
max_features: ['sqrt'] # ['log2', 2, 4, 8, 16, "auto"]
criterion: ['gini'] # ['entropy']
min_samples_split: [2] # [5, 10]
max_samples: [0.5] # [1.0]
bootstrap: [True]
bootstrap_features: [False] # [True]
n_estimators_bag: [10] # [25, 50, 100, 1000, 10000]
max_features_bag: [2] # [4, 8, 16]
random_state: [2193]
RandomForestBoosting:
n_estimators: [100] # [25, 50, 100, 1000, 10000]
max_depth: [20] # [10, 20, 50, 100]
max_features: [2] # ['sqrt', 'log2', 2, 4, 8, 16, "auto"]
criterion: ['gini'] # ['entropy']
min_samples_split: [2] # [5, 10]
algorithm: ['SAMME'] # ['SAMME.R']
learning_rate: [0.01] # [0.1, 1, 10, 100]
n_estimators_boost: [10] # [25, 50, 100, 1000, 10000]
random_state: [2193]
ExtraTrees:
n_estimators: [10000] #[ 25, 50, 100, 1000, 10000, 50000, 100000]
max_depth: [2,5,10] #[5, 10, 50, 100 ]
max_features: ['log2'] # [4, 8, 16, "auto"]
criterion: ['gini'] #, 'entropy']
min_samples_split: [2,5,10] #[2, 5, 10] #, 5, 10]
random_state: [2193]
n_jobs: [-1]
AdaBoost:
algorithm: ['SAMME', 'SAMME.R']
n_estimators: [1, 10, 100] # [1000, 10000]
learning_rate: [0.01, 0.1, 1, 10, 100]
random_state: [2193]
LogisticRegression:
C: [ 0.001, 0.01, 1] # [1, 10]
penalty: ['l1', 'l2']
random_state: [2193]
SVM:
C: [0.00001, 0.0001, 0.001, 0.01, 0.1] # [1, 10]
kernel: ['linear']
random_state: [2193]
GradientBoostingClassifier:
n_estimators: [1, 10, 100] # [1000, 10000]
learning_rate: [0.001, 0.01, 0.05, 0.1, 0.5]
subsample: [0.1, 0.5, 1.0]
max_depth: [1, 3, 5, 10, 20] # [50, 100]
random_state: [2193]
DecisionTreeClassifier:
criterion: ['gini', 'entropy']
max_depth: [1, 5, 10, 20] # [50, 100]
max_features: ['sqrt', 'log2']
min_samples_split: [2, 5, 10]
random_state: [2193]
SGDClassifier:
loss: ['log', 'modified_huber']
penalty: ['l1', 'l2', 'elasticnet']
random_state: [2193]
KNeighborsClassifier:
n_neighbors: [1, 3, 5, 10, 25, 50, 100]
weights: ['uniform', 'distance']
algorithm: ['auto', 'kd_tree']
########################
# Parallelization #
########################
n_cpus: 38