-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcerberusCheck.py
170 lines (126 loc) · 7.94 KB
/
cerberusCheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def tabulate(filename):
import pandas as pd
import numpy as np
import xlsxwriter as xl
import os
os.system('color')
#import regex as rg
'''
docstring
function: this program reads the Cerberus Excel data & returns a list of lists, each containing (Segment Name, LOH, TTL, LRR)
'''
#filename = r"\\sinsdn38.ap.infineon.com\BE_CLUSTER_PTE\04_Data_Management\09_Intern_Projects\Haikal Yusuf\Weekly LRR Reports\LW2106 Compile.xlsx"
'''
logweek = input("Which logweek would you like to query?")
logweek = int(logweek)
filename = f"\\sinsdn38.ap.infineon.com\BE_CLUSTER_PTE\04_Data_Management\09_Intern_Projects\Haikal Yusuf\Weekly LRR Reports\LW{logweek} Compile.xlsx"
#filename = repr(filename)
filename = r"{}".format(filename)
'''
#print("These are the Cerberus values")
sheets_dict = pd.read_excel(filename, sheet_name=None, skiprows=8)
full_table = pd.DataFrame()
# name is a key, sheet is a value (key-value pair)
# name is a String, sheet is a DataFrame
for name, sheet in sheets_dict.items():
# this adds another column containing the Worksheet name
sheet['sheet'] = name.split("-")[0].strip() + " " + name.split("-")[1].split(" ")[1].strip()
#sheet = sheet.rename(columns=lambda x: x.split('_')[0]) can't split because of column ALF_DISPOSITION
full_table = full_table.append(sheet)
#print('1. unique values in sheet', full_table.sheet.unique())
# filtering of data in LOH, TTL
'''
For 'Hold Comments':
WUXI DS has no filter applied,
WUXI CC retains DDM comments, excludes all http comments
'''
# Parameter violations that DSMAL lots have to adhere to
dsmal_list = ['1WAVIS','2DVIS','2TPVIS','3BOVIS','INTAPE','MBIN1','MISDEV','N.A.','OUTPAD','PADCOV','PADEDG','PADIMM','PNP','PURGE','TWBOTT','TWTOP','VISION_IN_TAPE']
# preserves these columns
full_table = full_table[ ['Owner', 'Hold Comments', 'sheet'] ]
# strip "!" from "Parameter ..." values in 'Hold Comments column (data cleaning in preparation for DSMAL)
full_table['Hold Comments'] = full_table['Hold Comments'].str.strip("!")
# filter for 'Owner' first (TTL count)
full_table = full_table[ full_table['Owner'].isin(['PROD', 'RISK', 'RISM', 'RWIC', 'SFLA']) ]
# filter for 'Hold Comments' (LOH count)
full_table = full_table[ full_table['Hold Comments'].isin(['Configure', 'Lot-Error']) | full_table['Hold Comments'].str.contains('Parameter')
# retains WUXI DS entries
| full_table['sheet'].str.contains('WUXI DS')
# retains WUXI CC entries
| ( full_table['sheet'].str.contains('WUXI CC') & full_table['Hold Comments'].str.contains('DDM') )
# retains DSMAL entries
# filtering for DSMAL entries is done later on
# retains TTL lots
| ( full_table['sheet'].str.contains('DWHView') )
]
# tuples to store each segment's values: LOH, TTL, LRR
dsmal_loh, plt_loh, sens_loh, ts_loh, wuxicc_loh, wuxids_loh, pob_loh = "", "", "", "", "", "", ""
dsmal_ttl, plt_ttl, sens_ttl, ts_ttl, wuxicc_ttl, wuxids_ttl, pob_ttl = "", "", "", "", "", "", ""
dsmal_LRR, plt_LRR, sens_LRR, ts_LRR, wuxicc_LRR, wuxids_LRR, pob_LRR = "", "", "", "", "", "", ""
segment_loh = [ dsmal_loh, plt_loh, sens_loh, ts_loh, wuxicc_loh, wuxids_loh, pob_loh ]
segment_ttl = [ dsmal_ttl, plt_ttl, sens_ttl, ts_ttl, wuxicc_ttl, wuxids_ttl, pob_ttl ]
segment_LRR = [ dsmal_LRR, plt_LRR, sens_LRR, ts_LRR, wuxicc_LRR, wuxids_LRR, pob_LRR ]
segment_tuple = ('DSMAL', 'PLT', 'SENS', 'TS', 'WUXI CC', 'WUXI DS', 'POB')
segment_stats_list = []
# counting of segment's stats
#for loh, ttl, lrr, name in zip(segment_loh, segment_ttl, segment_LRR, segment_tuple): zip doesn't work here because assignment isn't done/allowed in zip
for i, name in enumerate(segment_tuple):
if 'DSMAL' in name:
dsmal_df = full_table.copy(deep=True)
dsmal_df['new'] = dsmal_df['Hold Comments'].str.split(";+|:+")
dsmal_df = dsmal_df[ dsmal_df['sheet'].str.contains('DSMAL') & dsmal_df['sheet'].str.contains('LOH') ]
dsmal_df['new 1'] = [ x[1:] for x in dsmal_df['new'] if len(x) > 1 or x in ['Configure', 'Lot-Error'] ]
k = dsmal_df[ dsmal_df['new 1'].apply(lambda x: set(x).issubset(set(dsmal_list))) ]
dsmal_loh = len(dsmal_df.index) - len(k.index)
import os
os.system('color')
#print("DSMAL LOH value is ", end='')
#print('\x1b[6;30;42m' + str(dsmal_loh) + '\x1b[0m')
segment_loh[i] = dsmal_loh
segment_ttl[i] = len( full_table[ full_table['sheet'].str.contains(name) & full_table['sheet'].str.contains('DWHView') ].index )
segment_LRR[i] = round(dsmal_loh / segment_ttl[i], 5)
continue
if 'SENS' in name:
# need to take into consideration for SENS, since the data for TTL is split into 2 worksheets
ttl1 = len( full_table[ full_table['sheet'].str.contains(name) & full_table['sheet'].str.contains('1') & full_table['sheet'].str.contains('DWHView') ].index )
ttl2 = len( full_table[ full_table['sheet'].str.contains(name) & full_table['sheet'].str.contains('2') & full_table['sheet'].str.contains('DWHView') ].index )
ttl = ttl1 + ttl2
else:
ttl = len( full_table[ full_table['sheet'].str.contains(name) & full_table['sheet'].str.contains('DWHView') ].index )
loh = len( full_table[ full_table['sheet'].str.contains(name) & full_table['sheet'].str.contains('LOH') ].index )
lrr = round(loh / ttl, 5)
#print(f'{name}\'s stats are {loh}, {ttl}, {lrr*100}%')
segment_loh[i] = loh
segment_ttl[i] = ttl
segment_LRR[i] = lrr
#segment_stats_list.append( [loh, ttl, lrr] )
segment_stats = zip(segment_tuple, segment_loh, segment_ttl, segment_LRR)
segment_stats_list = list(segment_stats)
return segment_stats_list
'''
# To set up an Excel workbook to track all segment's values: LOH, TTL & LRR%
'''
#tabulate()
'''
references:
https://stackoverflow.com/questions/41340341/how-to-determine-the-length-of-lists-in-a-pandas-dataframe-column
https://www.kite.com/python/answers/how-to-filter-a-pandas-dataframe-with-a-list-by-%60in%60-or-%60not-in%60-in-python
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.split.html
https://stackoverflow.com/questions/36108377/how-to-use-the-split-function-on-every-row-in-a-dataframe-in-python
https://www.programiz.com/python-programming/list
https://stackoverflow.com/questions/34468983/how-to-check-if-all-elements-in-a-tuple-or-list-are-in-another
https://stackoverflow.com/questions/8113782/split-string-on-whitespace-in-python
https://pandas.pydata.org/pandas-docs/version/0.23.4/text.html
https://medium.com/swlh/3-ways-to-filter-pandas-dataframe-by-column-values-dfb6609b31de
https://www.programiz.com/python-programming/methods/built-in/zip
from friday (30/10)'s session
https://stackoverflow.com/questions/17322109/get-dataframe-row-count-based-on-conditions
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html
https://stackoverflow.com/questions/20246722/typeerror-object-of-type-float-has-no-len
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.dtypes.html
https://www.programiz.com/python-programming/list-comprehension
https://stackoverflow.com/questions/40646458/list-comprehension-in-pandas
https://www.geeksforgeeks.org/python-retain-list-elements-value-items/
https://towardsdatascience.com/how-to-quickly-create-and-unpack-lists-with-pandas-d0e78e487c75
https://www.kite.com/python/answers/how-to-limit-a-float-to-two-decimal-places-in-python
'''