forked from D1ngn/U-TasNet-Beam
-
Notifications
You must be signed in to change notification settings - Fork 0
/
asr_server_julius.py
86 lines (74 loc) · 3.56 KB
/
asr_server_julius.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# -*- coding: utf-8 -*-
### import ##############
import cherrypy
import subprocess
import os
import time
import soundfile as sf
import numpy as np
import socket
from io import BytesIO
### configure ###########
JULIUS_HOME = os.path.join(os.environ['HOME'], "julius/julius")
JULIUS_EXEC = "./julius -C ../dictation-kit-4.5/main.jconf -C ../dictation-kit-4.5/am-gmm.jconf -nostrip -input file -outfile" # 「前に進め」、「後ろに下がれ」などを認識
# JULIUS_EXEC = "./julius -C /Users/nagano.daichi/julius/dictation-kit-4.5/main.jconf -C /Users/nagano.daichi/julius/dictation-kit-4.5/am-dnn.jconf -dnnconf /Users/nagano.daichi/julius/dictation-kit-4.5/julius.dnnconf -nostrip -input file -outfile" # 自由な単語を認識(処理速度は遅い)
SERVER_PORT = 8000
ASR_FILEPATH = os.path.join(os.environ['HOME'], 'NeuralBeamformer/recog_result/asr_result/')
ASR_IN = 'ch_asr.wav'
ASR_RESULT = 'ch_asr.out'
OUT_CHKNUM = 5 # for avoiding that the output file is empty
### class define ########
class ASRServer(object):
# Julius execution -> subprocess
p = subprocess.Popen(JULIUS_EXEC, shell=True, cwd=JULIUS_HOME,
stdin=subprocess.PIPE,stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True)
(stdouterr, stdin) = (p.stdout, p.stdin)
# main task
def index(self):
return """
<html><body>
<h2>Julius Server</h2>
USAGE:<br />
- 16000Hz, wav(or raw)-file, big-endian, mono<br />
<br />
<form action="asr_julius" method="post" enctype="multipart/form-data">
filename: <input type="file" name="myFile" /><br />
<input type="submit" />
</form>
</body></html>
"""
index.exposed = True
def asr_julius(self, myFile):
# receive WAV file from client & write WAV file
# クライアント側からバイナリデータ形式で送られてきたwavデータを一度wavファイルに保存する
# with open(ASR_FILEPATH + ASR_IN, 'wb') as f:
# f.write(myFile.file.read())
# f.close()
audio_data = np.load(BytesIO(myFile.file.read()))
sf.write(ASR_FILEPATH + ASR_IN, audio_data, 16000)
# ASR using Julius
if os.path.exists(ASR_FILEPATH + ASR_RESULT):
os.remove(ASR_FILEPATH + ASR_RESULT) # delete a previous result file
send_msg = ASR_FILEPATH + ASR_IN + '\n'
self.p.stdin.write(send_msg.encode()) # send wav file name to Julius
# self.p.stdin.write(ASR_FILEPATH + ASR_IN + '\n') # send wav file name to Julius
self.p.stdin.flush() # バッファの解放
# wait for result file creation & result writing (avoid the file empty)
while not (os.path.exists(ASR_FILEPATH + ASR_RESULT) and len(open(ASR_FILEPATH + ASR_RESULT).readlines()) == OUT_CHKNUM):
time.sleep(0.1)
# read result file & send it to client
outlines = open(ASR_FILEPATH + ASR_RESULT).readline()[12:] # 認識結果のみ
# outlines = open(ASR_FILEPATH + ASR_RESULT).read()
# outlines = "<xmp>" + outlines + "</xmp>"
return outlines
asr_julius.exposed = True
if __name__ == "__main__":
# get own IP
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
server_ip = s.getsockname()[0]
# start the CherryPy server
cherrypy.config.update({'server.socket_port': SERVER_PORT,})
cherrypy.config.update({'server.socket_host': server_ip,})
cherrypy.quickstart(ASRServer())