-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
BrianBu47
committed
Jun 18, 2024
1 parent
533bee0
commit 7619477
Showing
4 changed files
with
296 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
from flask import Flask, request, send_file | ||
import main | ||
from datetime import datetime | ||
import json | ||
import redis | ||
|
||
app=Flask(__name__) | ||
''' | ||
configueration phase | ||
''' | ||
# load config | ||
configfile=None | ||
with open("config.json","r") as f: | ||
configfile=json.loads(f.read()) | ||
# model pipeline config | ||
main.pipeinit(configfile["Model"]) | ||
# redis config | ||
redis_enabled=False | ||
r=None | ||
cache_prefix=None | ||
try: | ||
r=redis.Redis(host=configfile["Redis"]["address"], | ||
port=configfile["Redis"]["port"], | ||
password=configfile["Redis"]["password"], | ||
decode_responses=True | ||
) | ||
cache_prefix=configfile["Redis"]["prefix"] | ||
r.set('foo', 'bar') | ||
r.get('foo') | ||
type(r.get('ne')) | ||
print("redis initiated") | ||
except: | ||
print("redis not connected") | ||
# application port config | ||
port="9900" | ||
if configfile["Application"]["port"] !="": | ||
port=configfile["Application"]["port"] | ||
# application from and to are not applicable here | ||
|
||
# basic Translation | ||
def translate_one(data): | ||
result=main.translate_batch([data])[0] | ||
#print(result) | ||
return result | ||
''' | ||
controllers | ||
''' | ||
# this works for the xunity autotranslator, the normal bunch | ||
@app.route("/translate",methods=["GET"]) | ||
def auto_translate(): | ||
data=request.args.get('text') | ||
print(data) | ||
# Check Redis config | ||
if redis_enabled==True: | ||
rd=r.get(cache_prefix+data) | ||
if rd!=None: | ||
print("cache hit",data,"=>",rd) | ||
return rd | ||
else: | ||
print("cache miss") | ||
time=datetime.now() | ||
result=translate_one(data) | ||
print("time:",datetime.now()-time) | ||
if r.set(cache_prefix+data,result): | ||
print("cache in",data,"=>",result) | ||
return result | ||
else: | ||
time=datetime.now() | ||
result=translate_one(data) | ||
print("time:",datetime.now()-time) | ||
print(result) | ||
return result | ||
# this opens the upload interface | ||
@app.route("/translate/upload",methods=["GET"]) | ||
def show_upload(): | ||
with open("interface.html","r") as f: | ||
return f.read() | ||
# this translates json | ||
# this is not async should be a problem, but capacity wise i don't have enough machines | ||
@app.route("/translate/json",methods=["POST"]) | ||
def json_translate(): | ||
result={} | ||
k=list(request.json.keys()) | ||
r=main.translate_batch(k) | ||
for i in range(len(k)): | ||
result[k[i]]=r[i] | ||
filename=datetime.now().strftime("%y%m%d%H%M%S") | ||
with open(f"tmp/{filename}.json","w") as f: | ||
json.dump(result,f,indent=2,ensure_ascii=False) | ||
return send_file(f"tmp/{filename}.json") | ||
''' | ||
start up application | ||
''' | ||
if __name__=='__main__': | ||
app.run(host="0.0.0.0", port=port) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ | ||
"Application":{ | ||
"port":"9900", | ||
"from":"", | ||
"to":"" | ||
}, | ||
"Redis":{ | ||
"address":"", | ||
"password":"", | ||
"port":"", | ||
"prefix":"" | ||
}, | ||
"DB":"", | ||
"Mysql":{ | ||
"address":"", | ||
"user":"", | ||
"password":"", | ||
"port":"", | ||
"db":"" | ||
}, | ||
"Sqlite":{ | ||
"path":"", | ||
"filename":"", | ||
"sizelimit":"" | ||
}, | ||
"Model":{ | ||
"model_path":"iryneko571/mt5-translation-ja_zh-game-small", | ||
"repetition_penalty":1.4, | ||
"batch_size":64, | ||
"max_length":256 | ||
}, | ||
"Translation":{ | ||
"lang":"<-ja2zh->" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>Upload and Send JSON File</title> | ||
</head> | ||
<body> | ||
|
||
<h2>Upload a JSON File</h2> | ||
|
||
<input type="file" id="fileInput" accept=".json"> | ||
<p id="statusMessage"></p> | ||
<p id="fileid" style="display:none"></p> | ||
<a id="downloadAnchorElem" style="display:none"></a> | ||
|
||
<script> | ||
document.getElementById('fileInput').addEventListener('change', function(event) { | ||
const file = event.target.files[0]; | ||
|
||
if (file && file.type === "application/json") { | ||
const reader = new FileReader(); | ||
|
||
reader.onload = function(e) { | ||
const content = e.target.result; | ||
sendJsonToServer(content); | ||
}; | ||
|
||
reader.readAsText(file); | ||
} else { | ||
alert("Please upload a valid JSON file."); | ||
} | ||
}); | ||
|
||
function sendJsonToServer(jsonContent) { | ||
fetch('/translate/json', { | ||
method: 'POST', | ||
headers: { 'Content-Type': 'application/json' }, | ||
body: jsonContent | ||
}) | ||
.then(response => response.json()) | ||
.then(data => { | ||
document.getElementById('statusMessage').textContent = "File successfully sent!"; | ||
//console.log("Response from server:", data); | ||
//document.getElementById('fileid').textContent = data; | ||
var dataStr = "data:text/json;charset=utf-8," + encodeURIComponent(JSON.stringify(data,null,2)); | ||
var dlAnchorElem = document.getElementById('downloadAnchorElem'); | ||
dlAnchorElem.setAttribute("href", dataStr ); | ||
dlAnchorElem.setAttribute("download", "scene.json"); | ||
dlAnchorElem.click(); | ||
}) | ||
.catch(error => { | ||
document.getElementById('statusMessage').textContent = "Error sending file."; | ||
console.error("Error:", error); | ||
}); | ||
} | ||
</script> | ||
|
||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
from transformers import pipeline | ||
from transformers.pipelines.pt_utils import KeyDataset | ||
import re | ||
import copy | ||
from datetime import datetime | ||
from tqdm.auto import tqdm | ||
from datasets import Dataset | ||
|
||
#model_path="iryneko571/mt5-translation-ja_zh-game-small" | ||
''' | ||
setup pipeline for translation | ||
''' | ||
pipe=None | ||
def pipeinit(config): | ||
global pipe | ||
then = datetime.now() | ||
print(then,"initiate model") | ||
pipe = pipeline("translation", | ||
model=config["model_path"], | ||
tokenizer=config["model_path"], | ||
repetition_penalty=config["repetition_penalty"], # just avoid repeating in a cheap way | ||
batch_size=config["batch_size"], # just a reference don't set it too high | ||
max_length=config["max_length"]) | ||
now = datetime.now() | ||
print(now,"init time",now-then) | ||
|
||
''' | ||
text preprocess and post process | ||
switch between different types of breaks | ||
will add different stuff such as \t later | ||
''' | ||
# preprocess translatables | ||
def preprocess(batch): | ||
samples=[None] * len(batch) | ||
for i in range(len(batch)): | ||
if "\r\n" in batch[i]: | ||
samples[i]=(batch[i].replace("\r\n","\\n"),"rn") # rn for old type | ||
continue | ||
if "\\n" in batch[i]: | ||
samples[i]=(batch[i],"nn") # nn for two slash | ||
continue | ||
if "\n" in batch[i]: | ||
samples[i]=(batch[i].replace("\n","\\n"),"n") # n for one slash | ||
continue | ||
else: | ||
samples[i]=(batch[i],"s") # s for safe | ||
return samples | ||
|
||
# process translated back to original format | ||
def postprocess(samples): | ||
batch=[None] * len(samples) | ||
for i in range(len(samples)): | ||
t, a = samples[i] | ||
if a=="rn": | ||
batch[i]=t.replace("\\n","\r\n") | ||
continue | ||
if a=="nn": | ||
batch[i]=t | ||
continue | ||
if a=="n": | ||
batch[i]=t.replace("\\n","\n") | ||
continue | ||
if a=="s": | ||
batch[i]=t | ||
continue | ||
else: | ||
print(f"error determine the type of {t}") | ||
return batch | ||
|
||
''' | ||
initial batch translation | ||
will combine the batch and do the translation | ||
''' | ||
def liststream(list): | ||
for i in range(len(list)): | ||
yield i | ||
def translate_batch(batch,lang='<-ja2zh->'): # batch is an array of string | ||
# preprocess | ||
samples=preprocess(batch) | ||
# format translist | ||
trans_list=[None] * len(batch) | ||
for i in range(len(batch)): | ||
t,a = samples[i] | ||
trans_list[i]=f'{lang} {batch[i]}' | ||
# now translate | ||
global pipe | ||
transdict={ | ||
"text":trans_list | ||
} | ||
datalist=Dataset.from_dict(transdict) | ||
translated=[] | ||
for out in tqdm(pipe(KeyDataset(datalist, "text")),total=len(datalist)): | ||
#print(out) | ||
for o in out: | ||
translated.append(o) | ||
#translated={"test":"test"} | ||
#pipe(dataset, batch_size=batch_size), ) | ||
# format result | ||
resultlist=[None] * len(translated) | ||
for i in range(len(translated)): | ||
resultlist[i]=(translated[i]['translation_text'],samples[i][1]) | ||
# postprocess | ||
result=postprocess(resultlist) | ||
# return results | ||
return result | ||
|