-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.py
235 lines (179 loc) · 10.8 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import pandas as pd
import os
import datetime
import sys
from sys import exit
from datetime import timedelta
def delete_files(directory = os.getcwd()):
"""
Deletes all the .xlsx files modified in the last one hour within the specified directory.
This will always default to the current directory unless a parameter is specified.
Parameters:
directory (string, Optional): Directory where files should be deleted
Returns:
None
"""
files = os.listdir(directory)
for file in files:
if file.endswith(".xlsx"):
timestamp = os.path.getmtime(file) # File modification check
datestamp = datetime.datetime.fromtimestamp(timestamp) # Convert timestamp into DateTime object
current_time = datetime.datetime.now() # Using now() to get current time
time_difference = current_time - datestamp # Get time delta
if (time_difference.total_seconds() <= 3600): # 3600 seconds in one hour
os.remove(os.path.join(directory, file)) # Delete files
def sheet_exists(excel_filename, sheetname):
"""
Returns TRUE if a sheet exists and FALSE if not.
Returns a boolean value for if a sheet with a certain name exists in a given xlsx file.
Parameters:
excel_filename (string): The excel/xlsx filename without ".xlsx"
sheetname (string): The name of the sheet that is being verified
Returns:
bool: True if the sheet exists, False if not
"""
path = excel_filename + ".xlsx"
try:
sheets = pd.ExcelFile(path).sheet_names
if sheetname in sheets:
return True
return False
except:
return False
def get_length_of_longest_list(lst):
"""Returns (int) the length of the longest sublist contained in lst; lst should be a list-of-lists."""
return max(len(x) for x in lst)
def xlsx(dataframe, excel_filename, sheetname):
"""
Exports a Pandas DataFrame to an existing or new .xlsx file.
Either makes both a new .xlsx file and a new sheet for the dataframe to go into, or only makes a new sheet into an existing .xlsx and adds the dataframe there.
Parameters:
dataframe (DataFrame): a single Pandas DataFrame which should be prepared to be exported to excel
excel_filename (string): the excel/xlsx filename without ".xlsx"
sheetname (string): the name of the sheet in the excel file that will contain the DataFrame
Returns:
None
"""
path = excel_filename + ".xlsx"
try: # First, try to add a new sheet to the current xlsx file if it already exists
with pd.ExcelWriter(path, mode="a") as writer:
dataframe.to_excel(writer, sheet_name=sheetname)
except: # If no xlsx file has yet been created, then make a new one
dataframe.to_excel(path, sheet_name = sheetname)
def abort(userinput):
"""Helper function for the main start() driver function."""
if (userinput == "e"): # Exit
exit()
elif (userinput == "r"): # Restart to re-enter parameters
start()
else: # Continue start() function as normal
return()
def look_for_data(txt_filename, query, deleteNoneType = True):
"""
Exports a Pandas DataFrame to an existing or new .xlsx file.
Either makes both a new .xlsx file and a new sheet for the dataframe to go into, or only makes a new sheet into an existing .xlsx and adds the dataframe there.
Parameters:
txt_filename (string): the .txt filename without ".txt"
query (string): CASE SENSITIVE... the item to search (verified use cases: any test number, any test name, "FAIL" for every failing test, "PASS" for every passing test)
deleteNoneType (bool, Optional): if disabled, entries where data does not exist will hold the string value "None" in the final excel file
Returns:
int: 0 if no data is found, 1 if any instance of relevant data is found
DataFrame: A dataframe that will be exported to excel containing all relevant data that was searched for
"""
allFlag = 0
if query == "ALL": # Use both "PASS" and "FAIL" as queries
allFlag = 1
data = [] # A list-of-lists that will collect data and be used to initialize a large dataframe when ready
numOfColumns = 0 # Number of columns for the future dataframe
columnBase = ["Number", "Site", "Result", "Test Name"] # Every test number has at least these rows, but some have more
check = 0 # Flag to see if test number is found/the data is then successfully exported to xlsx
with open(txt_filename + ".txt") as reader: # Open the file for reading
for line in reader: # Process the file line by line
if (line.isspace() is False): # Ignore completely blank lines
splitline = line.split() # Split each individual line into a list with whitespace as the delimiter
queryPresent = False
if (allFlag == 0):
if (query in splitline):
queryPresent = True
else:
if ("PASS" in splitline or "FAIL" in splitline):
queryPresent = True
if (queryPresent is True): # Check if an EXACT match for query is in the line (i.e. 10000 should return false if checking for 100)
if (check == 0):
check = 1 # Update check to reflect that at least one instance of the data has been found
data.append(splitline) # Each list element in data is going to be a row in the future dataframe
# Leave the loop and file reader; no longer need to process the file, all relevant data is now stored in the data list
numOfColumns = get_length_of_longest_list(data) # Need as many columns as the length of the longest list
extraColumns = numOfColumns - 4 # Number of columns after the columBase of: "Number Site Result TestName"
if (extraColumns > 0): # This means it's going to be more columns, something like: Number Site Result TestName Pin Channel Low Measured High..... etc
for i in range(extraColumns):
columnBase.append(i+1) # The extra columns are going to be titled in the dataframe from 1 to N where N is the number of columns beyond the columnBase of 4 columns
# The reason for this is because it is impossible to know for sure what data these extra columns will contain, as it's variable
# Prepare and return the dataframe and check flag
df = pd.DataFrame(data, columns=columnBase) # Generate the dataframe to export to excel
if (deleteNoneType is True): # Replace the entries of "None" to be blanks
df.fillna("", inplace=True)
return df, check
def start(xlsx_path=None):
"""Driver function; prints necessary prompts to terminal and collects input. The xlsx_path parameter is used if recursively called (export to same xlsx file)."""
linebreak = "--------------------------------------------------------------------------------------------------------------" # Just for formatting to terminal
print("\nFollow the provided prompts. At any point, type e to exit or r to re-enter parameters.")
print("\nPLEASE NOTE if you type e to exit you must restart the .exe file to run the script.")
print(linebreak)
while True: # Prevent searching for a blank file
txt_filename = input("\nEnter the .txt filename to search: ")
abort(txt_filename)
if txt_filename != "" and txt_filename.isspace() is False:
break
print(linebreak)
if (xlsx_path is None): # Prompt for a xlsx filename if not recursively adding another sheet to the same xlsx file
while True: # Prevent exporting to a blank file
xlsx_path = input("\nEnter the .xlsx filename to export to: ")
abort(xlsx_path)
if xlsx_path != "" and xlsx_path.isspace() is False:
break
print(linebreak)
while True: # Do not allow exporting to same sheet; blank/whitespace will still pass this and continue to use the default sheet name
xlsx_sheet = input("\n(Optional, may leave this blank) Enter the sheet name to export to: ")
abort(xlsx_sheet)
if sheet_exists(xlsx_path, xlsx_sheet) is False:
break
print(linebreak)
while True: # Prevent searching for nothing or for whitespace
query = input("\nEnter the term you would like to search for (test number, test name, PASS, FAIL, ALL): ")
abort(query)
if query != "" and query.isspace() is False:
break
print(linebreak)
if (xlsx_sheet.isspace()) or (xlsx_sheet == ""): # Default sheet name/fail safe
xlsx_sheet = query # Set the default sheet name to be the item being searched for
try:
df, check = look_for_data(txt_filename, query) # If check = 1, data has been found successfully
if (check != 1):
retry = input("Item not found. Type y to run again or anything else to exit: ")
if (retry == "y"):
start() # Try again
return() # Exit
else:
print(linebreak)
print("\n Found the following data and exported it to excel: \n")
print(df)
xlsx(df, xlsx_path, xlsx_sheet)
add_more = input("\nWould you like to export anything else? Type y to do so or anything else to exit: ")
if (add_more == "y"):
same_sheet = input("Would you like to export to the same excel file in a new sheet? Type y to do so or anything else to change the target excel sheet to export to: ")
if (same_sheet == "y"):
start(xlsx_path) # Recursively add into the same xlsx file, but in a new sheet
else:
start() # Reprompt for xlsx file target; add to a specified xlsx file
return() # Exit
except:
print("Error. One of the supplied parameters may have been incorrect? Retrying...\n")
delete = input("Do you want to delete any .xlsx files that have been modified so far in the last hour in the current directory? Type YES! (case sensitive) to do so or anything else to continue: ")
if (delete == "YES!"):
delete_files() # Delete any .xlsx files that were modified in the last hour in the current directory
start() # Try again
print("Data Parsing Script: .txt to .xlsx")
print("Please have relevant .txt files in the same directory as this script, or this script will not be able to read them!")
print("If you want to add sheets to an existing .xlsx file or multiple existing .xlsx files, have all relevant .xlsx files in the same directory as well.")
start()