-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtranslate-lindat.py
executable file
·59 lines (46 loc) · 1.94 KB
/
translate-lindat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
# Credits for this script goes to Ondra Dušek
# Dominik updated it.
from argparse import ArgumentParser
import requests # pip install requests
import logging
import sys
logger = logging.getLogger(__name__)
handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
logger.addHandler(handler)
logger.setLevel(logging.INFO)
# specify the target here
TARGET="cs"
#TARGET="hi"
#TARGET="fr"
# Russian doesn't work from some reason...
# TARGET="ru"
def process_file(in_file, out_file):
transls = []
fh = open(out_file, 'w', encoding='UTF-8')
with open(in_file,"r") as in_f:
texts=[]
for line_no, text in enumerate(in_f):
texts.append(text[:-1])
if line_no > 0 and line_no % 10 == 0:
print((texts,))
r = requests.post('https://lindat.mff.cuni.cz/services/translation/api/v2/models/en-%s?tgt=%s&src=en' % (TARGET, TARGET), headers = {"accept": "application/json"}, data={'input_text': "\n".join(texts)})
if r.status_code == 200:
transl = "".join([s+(" " if s[-1] != "\n" else "") for s in r.json()]) #" ".join([sent.strip() for sent in r.json()])
logger.info('OK: %s\n -> %s' % (text, transl))
else:
logger.warn('!!Translation status code: %d' % r.status_code)
transls.append('%s' % r.status_code)
logger.info("Line %d -- saving" % line_no)
# save partial work every 100 requests
fh.write("".join(transl))
fh.flush()
texts = []
fh.close()
if __name__ == '__main__':
ap = ArgumentParser()
ap.add_argument('in_file', type=str, help='File to translate')
ap.add_argument('out_file', type=str, help='Translated output')
args = ap.parse_args()
process_file(args.in_file, args.out_file)