-
Notifications
You must be signed in to change notification settings - Fork 7
/
PostProcess.py
34 lines (32 loc) · 1.03 KB
/
PostProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class CorpusPostProcess:
def __init__(self) -> None:
self.keywords_to_remove = [
"提问:",
"Cypher: ",
"中文: ",
" **Translation:**",
"**",
" -",
"`",
". ",
]
def remove_process(self, raw_prompts):
prompts = raw_prompts.split("\n")
output = []
for prompt in prompts:
# remove keywords
for keyword in self.keywords_to_remove:
if keyword == ". ": # remove "1."
dot_index = prompt.find(". ")
if dot_index != -1:
prompt = prompt[dot_index + 2 :]
continue
prompt = prompt.replace(keyword, "")
# remove white space
prompt = prompt.strip()
if prompt:
output.append(prompt)
return output
def process(self, raw_prompts):
prompts = self.remove_process(raw_prompts)
return prompts