-
Notifications
You must be signed in to change notification settings - Fork 0
/
findCostDepartmentProject.py
69 lines (61 loc) · 2.62 KB
/
findCostDepartmentProject.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from pm4py.objects.log.importer.xes import factory as xes_import_factory
from pm4py.objects.conversion.log import converter as converter
from pandas import DataFrame
from matplotlib import pyplot as plt, patches as pt, lines as l
from math import sqrt
def findAvgCostPerDep(df: DataFrame):
map = {}
variantMap = {}
count = 0
average = 0
for _, group in df.groupby("(case)_id"):
k = group.iloc[0]["(case)_Permit_OrganizationalEntity"]
if k != "UNKNOWN":
k = k.split(" ")[2]
value = group.iloc[0]["(case)_AdjustedAmount"]
average += float(value)
count += 1
if k in map.keys():
map[k] = (float(map[k][0]) + float(value), map[k][1] + 1)
else:
map[k] = (float(value), 1)
average = average/count
for _, group in df.groupby("(case)_id"):
k = group.iloc[0]["(case)_Permit_OrganizationalEntity"]
if k != "UNKNOWN":
k = k.split(" ")[2]
value = (float(group.iloc[0]["(case)_AdjustedAmount"]) - average)**2
if k in variantMap.keys():
variantMap[k] = (float(map[k][0]) + float(value), map[k][1] + 1)
else:
variantMap[k] = (float(value), 1)
for key in variantMap.keys():
variantMap[key] = sqrt(variantMap[key][0]/variantMap[key][1])
for key in map.keys():
map[key] = map[key][0]/map[key][1]
names = list(map.keys())
values = list(map.values())
average = [average for _ in range(len(values))]
variantNames = list(variantMap.keys())
variantValues = list(variantMap.values())
fig, ax = plt.subplots()
ax.bar(range(len(map)), values, tick_label=names)
ax.plot(range(len(map)), average, color='red', linestyle='dashed')
plt.xticks(rotation=90)
bluePatch = pt.Patch(color="blue", label="average cost per dep")
redLine = l.Line2D([], [], color="red", linestyle="dashed", label="total average")
plt.legend(handles=[bluePatch, redLine])
fig.tight_layout()
plt.savefig('costPerDepartment.png')
plt.close(fig)
fig2, ax2 = plt.subplots()
ax2.bar(range(len(variantMap)), variantValues, tick_label=variantNames)
plt.xticks(rotation=90)
bluePatch = pt.Patch(color="blue", label="mean squared error per dep")
plt.legend(handles=[bluePatch])
fig2.tight_layout()
plt.savefig('meanSquaredErrorCosts.png')
if __name__ == '__main__':
internationalLog = xes_import_factory.apply("logs/InternationalDeclarationsComplete.xes")
internationalDF = converter.apply(internationalLog, None, converter.TO_DATA_FRAME)
findAvgCostPerDep(internationalDF)