From 797bb53a14cfdd2c4217770140a3d321e47b9751 Mon Sep 17 00:00:00 2001
From: nl <3210346136@qq.com>
Date: Wed, 16 Mar 2022 21:37:49 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=9F=BA=E4=BA=8Ehtt?=
=?UTF-8?q?p=E5=8D=8F=E8=AE=AE=E7=9A=84=E6=96=B0=E8=B0=83=E7=94=A8?=
=?UTF-8?q?=E6=8E=A5=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
asrt_sdk/Recorder.py | 38 ++++++++-
asrt_sdk/SpeechRecognizer.py | 113 ++++++++++++++++++++++++-
asrt_sdk/__init__.py | 2 +
asrt_sdk/utils.py | 160 +++++++++++++++++++++++++++++++++++
setup.py | 34 +++++++-
5 files changed, 339 insertions(+), 8 deletions(-)
create mode 100644 asrt_sdk/utils.py
diff --git a/asrt_sdk/Recorder.py b/asrt_sdk/Recorder.py
index ea38081..cc39f69 100644
--- a/asrt_sdk/Recorder.py
+++ b/asrt_sdk/Recorder.py
@@ -1,9 +1,33 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2016-2099 Ailemon.net
+#
+# This file is part of ASRT Speech Recognition Tool Python SDK.
+#
+# ASRT is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# ASRT is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with ASRT. If not, see .
+# ============================================================================
+
+"""
+@author: nl8590687
+ASRT语音识别Python SDK 录音功能库
+"""
import wave
from pyaudio import PyAudio, paInt16
import struct
import threading
-
+import numpy as np
class AudioRecorder():
@@ -70,11 +94,19 @@ def SaveAudioToFile(self, filename):
pass
def GetAudioStream(self):
- return b"".join(self.__audio_buffers__)
+ bytesStream = b"".join(self.__audio_buffers__)
+ #print(bytesStream[-1000:])
+ #f=open('test0.bin','wb')
+ #f.write(bytesStream)
+ #f.close()
+ return bytesStream
def GetAudioSamples(self):
audio_bin_serials = self.GetAudioStream()
- return self.__audio_stream_to_short__(audio_bin_serials)
+ wave_data = np.fromstring(audio_bin_serials, dtype = np.short) # 将声音文件数据转换为数组矩阵形式
+ #print(wave_data[-1000:])
+ return wave_data
+ #return self.__audio_stream_to_short__(audio_bin_serials)
pass
def __audio_stream_to_short__(self, audio_stream):
diff --git a/asrt_sdk/SpeechRecognizer.py b/asrt_sdk/SpeechRecognizer.py
index 3021ee1..a8d190e 100644
--- a/asrt_sdk/SpeechRecognizer.py
+++ b/asrt_sdk/SpeechRecognizer.py
@@ -1,11 +1,122 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2016-2099 Ailemon.net
+#
+# This file is part of ASRT Speech Recognition Tool Python SDK.
+#
+# ASRT is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# ASRT is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with ASRT. If not, see .
+# ============================================================================
+
+"""
+@author: nl8590687
+ASRT语音识别Python SDK 语音识别接口调用类库
+"""
-from .Recorder import AudioRecorder
import threading
import time
import wave
import requests
import numpy as np
+from .Recorder import AudioRecorder
+from .utils import *
+
+def get_speech_recognizer(host:str, port:str, protocol:str):
+ '''
+ 获取一个ASRT语音识别SDK接口调用实例对象 \\
+ 参数:\\
+ host: 主机域名或IP.
+ port: 主机端口号.
+ protocol: 网络协议类型.
+ '''
+ if protocol.lower() == 'http' or protocol.lower() == 'https':
+ return HttpSpeechRecognizer(host, port, protocol)
+ return None
+
+
+class BaseSpeechRecognizer():
+ '''
+ ASRT语音识别SDK接口调用类基类
+ '''
+ def __init__(self, host:str, port:str, protocol:str):
+ self.host = host
+ self.port = port
+ self.protocol = protocol
+
+ def recognite(self, wav_data, frame_rate, channels, byte_width):
+ raise Exception("Method Unimpletment")
+
+ def recognite_speech(self, wav_data, frame_rate, channels, byte_width):
+ raise Exception("Method Unimpletment")
+
+ def recognite_language(self, sequence_pinyin):
+ raise Exception("Method Unimpletment")
+
+ def recognite_file(self, filename):
+ wave_data = read_wav_datas(filename)
+ str_data = wave_data.str_data
+ frame_rate = wave_data.sample_rate
+ channels = wave_data.channels
+ byte_width = wave_data.byte_width
+ return self.recognite(wav_data=str_data,
+ frame_rate=frame_rate,
+ channels=channels,
+ byte_width=byte_width
+ )
+
+class HttpSpeechRecognizer(BaseSpeechRecognizer):
+ '''
+ ASRT语音识别SDK基于HTTP协议接口调用类 \\
+ 参数: \\
+ host: 主机域名或IP.
+ port: 主机端口号.
+ protocol: 网络协议类型.
+ sub_path: 接口所在URL的子路径, 默认为""
+ '''
+ def __init__(self, host:str, port:str, protocol:str, sub_path:str=''):
+ super().__init__(host, port, protocol)
+ if protocol != 'http' and protocol != 'https':
+ raise Exception('Unsupport netword protocol `' + protocol +'`')
+ self._url_ = protocol + '://' + host + ':' + port
+ self.sub_path = sub_path
+
+ def recognite(self, wav_data, frame_rate:int, channels:int, byte_width:int) -> AsrtApiResponse:
+ request_body = AsrtApiSpeechRequest(wav_data, frame_rate, channels, byte_width)
+ headers = {'Content-Type': 'application/json'}
+ response_object = requests.post(self._url_ + self.sub_path + '/all', headers=headers, data=request_body.to_json())
+ response_body_dict = json.loads(response_object.text)
+ response_body = AsrtApiResponse()
+ response_body.from_json(**response_body_dict)
+ return response_body
+
+ def recognite_speech(self, wav_data, frame_rate, channels, byte_width):
+ request_body = AsrtApiSpeechRequest(wav_data, frame_rate, channels, byte_width)
+ headers = {'Content-Type': 'application/json'}
+ response_object = requests.post(self._url_ + self.sub_path + '/speech', headers=headers, data=request_body.to_json())
+ response_body_dict = json.loads(response_object.text)
+ response_body = AsrtApiResponse()
+ response_body.from_json(**response_body_dict)
+ return response_body
+
+ def recognite_language(self, sequence_pinyin):
+ request_body = AsrtApiLanguageRequest(sequence_pinyin)
+ headers = {'Content-Type': 'application/json'}
+ response_object = requests.post(self._url_ + self.sub_path + '/language', headers=headers, data=request_body.to_json())
+ response_body_dict = json.loads(response_object.text)
+ response_body = AsrtApiResponse()
+ response_body.from_json(**response_body_dict)
+ return response_body
class SpeechRecognizer():
def __init__(self, url_server = 'http://127.0.0.1:20000/', token_client = 'qwertasd'):
diff --git a/asrt_sdk/__init__.py b/asrt_sdk/__init__.py
index 0904006..f514dda 100644
--- a/asrt_sdk/__init__.py
+++ b/asrt_sdk/__init__.py
@@ -19,5 +19,7 @@
from . import Recorder, SpeechRecognizer
from .Recorder import AudioRecorder
from .SpeechRecognizer import SpeechRecognizer
+from .SpeechRecognizer import get_speech_recognizer, HttpSpeechRecognizer
+from .utils import read_wav_datas
__version__ = '1.0.0'
diff --git a/asrt_sdk/utils.py b/asrt_sdk/utils.py
new file mode 100644
index 0000000..ce510b8
--- /dev/null
+++ b/asrt_sdk/utils.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2016-2099 Ailemon.net
+#
+# This file is part of ASRT Speech Recognition Tool Python SDK.
+#
+# ASRT is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# ASRT is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with ASRT. If not, see .
+# ============================================================================
+
+"""
+@author: nl8590687
+ASRT语音识别Python SDK 基础库模块
+"""
+
+import base64
+import json
+import wave
+import numpy as np
+
+API_STATUS_CODE_OK = 200000 # OK
+API_STATUS_CODE_CLIENT_ERROR = 400000
+API_STATUS_CODE_CLIENT_ERROR_FORMAT = 400001 # 请求数据格式错误
+API_STATUS_CODE_CLIENT_ERROR_FORMAT = 400002 # 请求数据配置不支持
+API_STATUS_CODE_SERVER_ERROR = 500000
+API_STATUS_CODE_SERVER_ERROR_RUNNING = 500001 # 服务器运行中出错
+
+class AsrtApiSpeechRequest:
+ '''
+ ASRT语音识别基于HTTP协议的API接口请求类(声学模型)
+ '''
+ def __init__(self, samples, sample_rate, channels, byte_width):
+ self.samples = str(base64.urlsafe_b64encode(samples), encoding='utf-8')
+ self.sample_rate = sample_rate
+ self.channels = channels
+ self.byte_width = byte_width
+
+ def to_json(self):
+ '''
+ 类转json
+ '''
+ return json.dumps(self, default=lambda o: o.__dict__,
+ sort_keys=True)
+
+ def from_json(self, **entries):
+ '''
+ json转AsrtApiSpeechRequest
+ '''
+ self.__dict__.update(entries)
+
+ def __str__(self):
+ '''
+ AsrtApiSpeechRequest转为字符串
+ '''
+ return self.to_json()
+
+class AsrtApiLanguageRequest:
+ '''
+ ASRT语音识别基于HTTP协议的API接口请求类(声学模型)
+ '''
+ def __init__(self, sequence_pinyin):
+ self.sequence_pinyin = sequence_pinyin
+
+ def to_json(self):
+ '''
+ 类转json
+ '''
+ return json.dumps(self, default=lambda o: o.__dict__,
+ sort_keys=True)
+
+ def from_json(self, **entries):
+ '''
+ json转AsrtApiLanguageRequest
+ '''
+ self.__dict__.update(entries)
+
+ def __str__(self):
+ '''
+ AsrtApiLanguageRequest转为字符串
+ '''
+ return self.to_json()
+
+class AsrtApiResponse:
+ '''
+ ASRT语音识别基于HTTP协议的API接口响应类
+ '''
+ def __init__(self, status_code=0, status_message='', result=''):
+ self.status_code = status_code
+ self.status_message = status_message
+ self.result = result
+ def to_json(self):
+ '''
+ 类转json
+ '''
+ return json.dumps(self, default=lambda o: o.__dict__,
+ sort_keys=True)
+
+ def from_json(self, **entries):
+ '''
+ json转AsrtApiResponse
+ '''
+ self.__dict__.update(entries)
+
+ def __str__(self):
+ '''
+ AsrtApiResponse转为字符串
+ '''
+ return self.to_json()
+
+class WaveData:
+ '''
+ WAVE格式音频数据类
+ '''
+ def __init__(self, str_data, frame_rate, channels, byte_width) -> None:
+ self.str_data = str_data
+ self.sample_rate = frame_rate
+ self.channels = channels
+ self.byte_width = byte_width
+ self.filename = ''
+
+ def get_samples(self):
+ '''
+ str_data转short数组
+ '''
+ # 将声音文件数据转换为数组矩阵形式
+ wave_data = np.fromstring(self.str_data, dtype = np.short)
+ # 按照声道数将数组整形,单声道时候是一列数组,双声道时候是两列的矩阵
+ wave_data.shape = -1, self.channels
+ # 将矩阵转置
+ wave_data = wave_data.T
+ return wave_data
+
+ def set_filename(self, filename):
+ '''
+ 记录该wave文件名
+ '''
+ self.filename = filename
+
+def read_wav_datas(filename):
+ '''
+ 读取wave格式文件数据
+ '''
+ wav_file = wave.open(filename, 'rb')
+ num_frame = wav_file.getnframes()
+ str_data = wav_file.readframes(num_frame)
+ frame_rate = wav_file.getframerate()
+ channels = wav_file.getnchannels()
+ byte_width = wav_file.getsampwidth()
+ wav_file.close()
+ return WaveData(str_data, frame_rate, channels, byte_width)
diff --git a/setup.py b/setup.py
index 5e63a61..3221ca8 100644
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,28 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2016-2099 Ailemon.net
+#
+# This file is part of ASRT Speech Recognition Tool Python SDK.
+#
+# ASRT is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+# ASRT is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with ASRT. If not, see .
+# ============================================================================
+
+"""
+@author: nl8590687
+ASRT语音识别Python SDK 安装
+"""
+
try:
from setuptools import setup #enables develop
except ImportError:
@@ -24,12 +49,12 @@
'''
setup(name='asrt_sdk',
- version='1.0.0',
- description='A python client sdk for ASRT Deep-Learning-Based Auto Speech Recognition Toolkit',
+ version='1.1.0',
+ description='A python client caller sdk for ASRT Deep-Learning-Based Auto Speech Recognition Toolkit',
long_description=long_description,
long_description_content_type = 'text/markdown',
author='ailemon',
- author_email='ailemon@ailemon.me',
+ author_email='ailemon@ailemon.net',
license='GPL v3.0',
url='https://asrt.ailemon.net',
download_url = "https://pypi.python.org/pypi/asrt_sdk",
@@ -58,6 +83,7 @@
'GNU General Public License v3 or later (GPLv3+)'),
"Operating System :: OS Independent",
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.6'
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7'
]
)
\ No newline at end of file