-
Notifications
You must be signed in to change notification settings - Fork 0
/
configure.py
39 lines (30 loc) · 1.19 KB
/
configure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import csv
def convert_data(file_path, out_path):
with open(file_path, 'r') as csv_file, open(out_path, 'w', newline='') as output_file:
reader = csv.reader(csv_file)
writer = csv.writer(output_file)
next(reader, None)
writer.writerow(['tokens', 'labels'])
token_docs = []
tag_docs = []
current_tokens = []
current_tags = []
for row in reader:
if len(row) < 3:
if current_tokens and current_tags:
token_docs.append(current_tokens)
tag_docs.append(current_tags)
writer.writerow([current_tokens, current_tags])
current_tokens = []
current_tags = []
else:
current_tokens.append(row[1])
current_tags.append(row[2])
if current_tokens and current_tags:
token_docs.append(current_tokens)
tag_docs.append(current_tags)
writer.writerow([current_tokens, current_tags])
return token_docs, tag_docs
# Call the function
convert_data('train.csv', 'transformed_train.csv')
convert_data('validation.csv', 'transformed_val.csv')