-
Notifications
You must be signed in to change notification settings - Fork 2
/
dataset_confs.py
79 lines (67 loc) · 3.37 KB
/
dataset_confs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
case_id_col = {}
activity_col = {}
resource_col = {}
timestamp_col = {}
label_col = {}
treatment_col = {}
pos_treatment = {}
neg_treatment = {}
pos_label = {}
neg_label = {}
dynamic_cat_cols = {}
static_cat_cols = {}
dynamic_num_cols = {}
static_num_cols = {}
filename = {}
logs_dir = "./prepared_data/"
# /home/mshoush/Desktop/uncertainity/uncer_2/CatBoost_uncer/data"
#logs_dir = "~/phd/code/Mahmoud_PrescriptiveProcessMonitoring/prepared_data"
#### BPIC2017 settings ####
#TODO change file name for pickle
bpic2017_dict = {"bpic2017": "prepared_treatment_outcome_bpic2017.csv"}
for dataset, fname in bpic2017_dict.items():
filename[dataset] = os.path.join(logs_dir, fname)
# min cols
case_id_col[dataset] = "Case ID"
activity_col[dataset] = "Activity"
resource_col[dataset] = 'org:resource'
timestamp_col[dataset] = 'time:timestamp'
# label/outcome col
label_col[dataset] = "label"
neg_label[dataset] = "regular" # negative outcome that we don't need to predict
pos_label[dataset] = "deviant" # positive outcome that will be predicted
# treatment col
treatment_col[dataset] = 'treatment'
pos_treatment[dataset] = "treat" # do treatment
neg_treatment[dataset] = "noTreat" # do not treat
# features for classifier
dynamic_cat_cols[dataset] = ["Activity", 'org:resource', 'Action', 'EventOrigin', 'lifecycle:transition', "Accepted", "Selected"]
static_cat_cols[dataset] = ['ApplicationType', 'LoanGoal'] #static attributes, no need for predicting in suffix predictions
dynamic_num_cols[dataset] = ['FirstWithdrawalAmount', 'MonthlyCost', 'NumberOfTerms', 'OfferedAmount', 'CreditScore', "timesincelastevent", "timesincecasestart", "timesincemidnight", "event_nr", "month", "weekday", "hour","open_cases"]
static_num_cols[dataset] = ['NumberOfOffers' ,'RequestedAmount',] #static attributes, no need for predicting in suffix predictions
#cat_feat = ["Activity", 'org:resource', 'Action', 'EventOrigin', 'lifecycle:transition', "Accepted", "Selected", 'ApplicationType', 'LoanGoal']i
bpic2012_dict = {"bpic2012": "prepared_treatment_outcome_bpic2012.csv"}
#BPI012
for dataset, fname in bpic2012_dict.items():
#logs_dir = "./data/" + dataset + "/"
filename[dataset] = os.path.join(logs_dir, fname)
#print(f"filename: {filename}")
# min cols
case_id_col[dataset] = "Case ID"
activity_col[dataset] = "Activity"
resource_col[dataset] = 'Resource'
timestamp_col[dataset] = 'start_time'
# label/outcome col
label_col[dataset] = "label"
neg_label[dataset] = "regular" # negative outcome that we don't need to predict
pos_label[dataset] = "deviant" # positive outcome that will be predicted
# treatment col
treatment_col[dataset] = 'treatment'
pos_treatment[dataset] = "treat" # do treatment
neg_treatment[dataset] = "noTreat" # do not treat
# features for classifier
dynamic_cat_cols[dataset] = ["Activity", 'Resource',] #'Action', 'EventOrigin', 'lifecycle:transition', "Accepted", "Selected"]
static_cat_cols[dataset] = [] #static attributes, no need for predicting in suffix predictions
dynamic_num_cols[dataset] = ["timesincelastevent", "timesincecasestart", "timesincemidnight", "event_nr", "month", "weekday", "hour","open_cases"]
static_num_cols[dataset] = ['NumberOfOffers', "AMOUNT_REQ",] #static attributes, no need for predicting in suffix predictions