', methods=["POST"])
+def recognition_post(level):
+ '''
+ 其他路径 POST方法
+ '''
+ #读取json文件内容
+ try:
+ if level == 'speech':
+ request_data = request.get_json()
+ samples = request_data['samples']
+ wavdata_bytes = base64.urlsafe_b64decode(bytes(samples,encoding='utf-8'))
+ sample_rate = request_data['sample_rate']
+ channels = request_data['channels']
+ byte_width = request_data['byte_width']
+
+ wavdata = decode_wav_bytes(samples_data=wavdata_bytes,
+ channels=channels, byte_width=byte_width)
+ result = ms.recognize_speech(wavdata, sample_rate)
+
+ json_data = AsrtApiResponse(API_STATUS_CODE_OK, 'speech level')
+ json_data.result = result
+ buffer = json_data.to_json()
+ print('output:', buffer)
+ return Response(buffer, mimetype='application/json')
+ elif level == 'language':
+ request_data = request.get_json()
+
+ seq_pinyin = request_data['sequence_pinyin']
+
+ result = ml.SpeechToText(seq_pinyin)
+
+ json_data = AsrtApiResponse(API_STATUS_CODE_OK, 'language level')
+ json_data.result = result
+ buffer = json_data.to_json()
+ print('output:', buffer)
+ return Response(buffer, mimetype='application/json')
+ elif level == 'all':
+ request_data = request.get_json()
+
+ samples = request_data['samples']
+ wavdata_bytes = base64.urlsafe_b64decode(samples)
+ sample_rate = request_data['sample_rate']
+ channels = request_data['channels']
+ byte_width = request_data['byte_width']
+
+ wavdata = decode_wav_bytes(samples_data=wavdata_bytes,
+ channels=channels, byte_width=byte_width)
+ result_speech = ms.recognize_speech(wavdata, sample_rate)
+ result = ml.SpeechToText(result_speech)
+
+ json_data = AsrtApiResponse(API_STATUS_CODE_OK, 'all level')
+ json_data.result = result
+ buffer = json_data.to_json()
+ print('output:', buffer)
+ return Response(buffer, mimetype='application/json')
+ else:
+ request_data = request.get_json()
+ print('input:', request_data)
+ json_data = AsrtApiResponse(API_STATUS_CODE_CLIENT_ERROR, '')
+ buffer = json_data.to_json()
+ print('output:', buffer)
+ return Response(buffer, mimetype='application/json')
+ except Exception as except_general:
+ request_data = request.get_json()
+ #print(request_data['sample_rate'], request_data['channels'],
+ # request_data['byte_width'], len(request_data['samples']),
+ # request_data['samples'][-100:])
+ json_data = AsrtApiResponse(API_STATUS_CODE_SERVER_ERROR, str(except_general))
+ buffer = json_data.to_json()
+ return Response(buffer, mimetype='application/json')
+
+
+if __name__ == '__main__':
+ # for development env
+ #app.run(host='0.0.0.0', port=20001)
+ # for production env
+ import waitress
+ waitress.serve(app, host='0.0.0.0', port=20001)
diff --git a/assets/default.html b/assets/default.html
new file mode 100644
index 0000000..a13d213
--- /dev/null
+++ b/assets/default.html
@@ -0,0 +1,25 @@
+
+
+
+
+ ASRT Speech Recognition API
+
+
+
+ ASRT Speech Recognition API
+ framework version: 1.0
+ If you see this page, the ASRT api server is successfully installed and working.
+ For online documentation and support please refer to ASRT Project Page.
+ Please call this web api by post menthod.
+ Thank you for using ASRT.
+
+
+
\ No newline at end of file
diff --git a/client_http.py b/client_http.py
new file mode 100644
index 0000000..2396d35
--- /dev/null
+++ b/client_http.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2016-2099 Ailemon.net
+#
+# This file is part of ASRT Speech Recognition Tool.
+#
+# ASRT is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# ASRT is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with ASRT. If not, see .
+# ============================================================================
+
+'''
+@author: nl8590687
+ASRT语音识别asrserver http协议测试专用客户端
+'''
+import base64
+import json
+import time
+import requests
+from utils.ops import read_wav_bytes
+
+URL = 'http://127.0.0.1:20001/all'
+
+wav_bytes, sample_rate, channels, sample_width = read_wav_bytes('Y:\\SpeechData\\语音数据集\\data_thchs30\\train\\A11_0.wav')
+datas = {
+ 'channels': channels,
+ 'sample_rate': sample_rate,
+ 'byte_width': sample_width,
+ 'samples': str(base64.urlsafe_b64encode(wav_bytes), encoding='utf-8')
+}
+headers = {'Content-Type': 'application/json'}
+
+t0=time.time()
+r = requests.post(URL, headers=headers, data=json.dumps(datas))
+t1=time.time()
+r.encoding='utf-8'
+
+result = json.loads(r.text)
+print(result)
+print('time:', t1-t0, 's')
diff --git a/utils/ops.py b/utils/ops.py
index 23ea543..b579f19 100644
--- a/utils/ops.py
+++ b/utils/ops.py
@@ -44,6 +44,18 @@ def read_wav_data(filename: str) -> tuple:
wave_data = wave_data.T # 将矩阵转置
return wave_data, framerate, num_channel, num_sample_width
+def read_wav_bytes(filename: str) -> tuple:
+ '''
+ 读取一个wav文件,返回声音信号的时域谱矩阵和播放时间
+ '''
+ wav = wave.open(filename,"rb") # 打开一个wav格式的声音文件流
+ num_frame = wav.getnframes() # 获取帧数
+ num_channel=wav.getnchannels() # 获取声道数
+ framerate=wav.getframerate() # 获取帧速率
+ num_sample_width=wav.getsampwidth() # 获取实例的比特宽度,即每一帧的字节数
+ str_data = wav.readframes(num_frame) # 读取全部的帧
+ wav.close() # 关闭流
+ return str_data, framerate, num_channel, num_sample_width
def get_edit_distance(str1, str2) -> int:
'''
@@ -89,3 +101,59 @@ def visual_2D(img):
plt.imshow(img)
plt.colorbar(cax=None, ax=None, shrink=0.5)
plt.show()
+
+def decode_wav_bytes(samples_data: bytes, channels: int = 1, byte_width: int = 2) -> list:
+ '''
+ 解码wav格式样本点字节流,得到numpy数组
+ '''
+ numpy_type = np.short
+ if byte_width == 4:
+ numpy_type = np.int
+ elif byte_width != 2:
+ raise Exception('error: unsurpport byte width `' + str(byte_width) + '`')
+ wave_data = np.fromstring(samples_data, dtype = numpy_type) # 将声音文件数据转换为数组矩阵形式
+ wave_data.shape = -1, channels # 按照声道数将数组整形,单声道时候是一列数组,双声道时候是两列的矩阵
+ wave_data = wave_data.T # 将矩阵转置
+ return wave_data
+
+def get_symbol_dict(dict_filename):
+ '''
+ 读取拼音汉字的字典文件
+ 返回读取后的字典
+ '''
+ txt_obj = open(dict_filename, 'r', encoding='UTF-8') # 打开文件并读入
+ txt_text = txt_obj.read()
+ txt_obj.close()
+ txt_lines = txt_text.split('\n') # 文本分割
+
+ dic_symbol = {} # 初始化符号字典
+ for i in txt_lines:
+ list_symbol=[] # 初始化符号列表
+ if i!='':
+ txt_l=i.split('\t')
+ pinyin = txt_l[0]
+ for word in txt_l[1]:
+ list_symbol.append(word)
+ dic_symbol[pinyin] = list_symbol
+
+ return dic_symbol
+
+def get_language_model(model_language_filename):
+ '''
+ 读取语言模型的文件
+ 返回读取后的模型
+ '''
+ txt_obj = open(model_language_filename, 'r', encoding='UTF-8') # 打开文件并读入
+ txt_text = txt_obj.read()
+ txt_obj.close()
+ txt_lines = txt_text.split('\n') # 文本分割
+
+ dic_model = {} # 初始化符号字典
+ for i in txt_lines:
+ if i!='':
+ txt_l=i.split('\t')
+ if len(txt_l) == 1:
+ continue
+ dic_model[txt_l[0]] = txt_l[1]
+
+ return dic_model