forked from uberspot/OpenTriviaQA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
text2csv.py
76 lines (74 loc) · 2.43 KB
/
text2csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import pandas as pd
import numpy as np
def txt_to_csv(path):
"""
The csv Generarted will be such:
|Questions | Correct | A | B | C | D |
0 | myQ | X | a | X | c | d |
"""
questions=[]
key=[]
dist1=[]
dist2=[]
dist3=[]
dist4=[]
with open(path, errors='ignore',mode="r") as file1:
files = file1.readlines()
i=0
for i in range(len(files)):
if files[i][0]=='\n':
try:
if files[i+1][3]=='#':
continue
questions.append(files[i+1][3:len(files)-1])
key.append(files[i+2][2:len(files[i+2])-1])
if (files[i+3]!="\n"):
dist1.append(files[i+3][2:len(files[i+3])-1])
else:
dist1.append(np.nan)
dist2.append(np.nan)
dist3.append(np.nan)
dist4.append(np.nan)
continue
if (files[i+4]!="\n"):
dist2.append(files[i+4][2:len(files[i+4])-1])
else:
dist2.append(np.nan)
dist3.append(np.nan)
dist4.append(np.nan)
continue
if (files[i+5]!="\n"):
dist3.append(files[i+5][2:len(files[i+5])-1])
else:
dist3.append(np.nan)
dist4.append(np.nan)
continue
if (files[i+6]!="\n"):
dist4.append(files[i+6][2:len(files[i+6])-1])
else:
dist4.append(np.nan)
except:
pass
bank={}
bank["Questions"]=questions
bank["Correct"]=key
bank["A"]=dist1
bank["B"]=dist2
bank["C"]=dist3
bank["D"]=dist4
df=pd.DataFrame(bank)
return df
def parse_files(sourcePath='/content/drive/MyDrive/Colab Notebooks/Data_trivial/',destination='/content/drive/MyDrive/Colab Notebooks/Data_trivial_csv/'):
"""
Input SourcePath and Destination Path to trverse through the files and convert them into csv
Requirement Python 3.x , Numpy , os , Pandas
or run this in Google Colab as it is
"""
filenames=sourcePath
for files in os.listdir(filenames):
path=filenames+files
data=txt_to_csv(path)
data.to_csv(destination+files+'.csv')
print(' Input SourcePath and Destination Path to trverse through the files and convert them into csv \n Requirement Python 3.x , Numpy , os , Pandas \n or run this in Google Colab as it is')
parse_files(sourcePath=input('SourcePath'),destination=input('Destination Path'))