From 145cbc21527d68613c6f4d0babe52763a8c7191f Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Fri, 5 Sep 2025 16:44:31 +0800 Subject: [PATCH 1/8] add tts param volume and speed --- src/zai/api_resource/audio/audio.py | 4 ++++ tests/integration_tests/test_audio.py | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index 9523f47..92fe520 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -55,6 +55,8 @@ def speech( extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + speed: float | None = 1.0, + volume: float | None = 1.0, ) -> HttpxBinaryResponseContent: """ Generate speech audio from text input @@ -80,6 +82,8 @@ def speech( 'sensitive_word_check': sensitive_word_check, 'request_id': request_id, 'user_id': user_id, + 'speed': speed, + 'volume': volume, } ) return self._post( diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index e143571..420f19b 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -10,12 +10,15 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore client = ZaiClient() # Fill in your own API Key try: - speech_file_path = Path(__file__).parent / 'asr1.wav' + speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( model='cogtts', input='Hello, welcome to Z.ai Open Platform', voice='female', - response_format='wav', + response_format='pcm', + encode_format='hex', + speed=1.0, + volume=1.0, ) response.stream_to_file(speech_file_path) From 175fda532116efe4e3d17c33fe71c8b5dd768fdb Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 11:52:42 +0800 Subject: [PATCH 2/8] feat: add encode_format param to audio.speech --- src/zai/api_resource/audio/audio.py | 2 ++ tests/integration_tests/test_audio.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index 92fe520..bed126c 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -55,6 +55,7 @@ def speech( extra_headers: Headers | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + encode_format: str = None, speed: float | None = 1.0, volume: float | None = 1.0, ) -> HttpxBinaryResponseContent: @@ -79,6 +80,7 @@ def speech( 'input': input, 'voice': voice, 'response_format': response_format, + 'encode_format': encode_format, 'sensitive_word_check': sensitive_word_check, 'request_id': request_id, 'user_id': user_id, diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 420f19b..8545eda 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -8,7 +8,7 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient() # Fill in your own API Key + client = ZaiClient(base_url='https://open.bigmodel.cn/api/paas/v4', api_key='') # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( @@ -16,13 +16,13 @@ def test_audio_speech(logging_conf): input='Hello, welcome to Z.ai Open Platform', voice='female', response_format='pcm', - encode_format='hex', + encode_format='base64', speed=1.0, volume=1.0, ) response.stream_to_file(speech_file_path) - except zai.core._errors.APIRequestFailedError as err: + except Exception as err: print(err) except zai.core._errors.APIInternalError as err: print(err) From e713eaaf305c16056f88a4468349767d7c3431c8 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 11:56:51 +0800 Subject: [PATCH 3/8] delete default host --- tests/integration_tests/test_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index ec51248..7c97038 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -8,7 +8,7 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient(base_url='https://open.bigmodel.cn/api/paas/v4', api_key='') # Fill in your own API Key + client = ZaiClient(base_url='', api_key='') # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( From 415d551c3279160fe427faf49a2acf716917b0eb Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 12:01:21 +0800 Subject: [PATCH 4/8] delete update --- tests/integration_tests/test_audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 7c97038..88dbb01 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -8,7 +8,7 @@ def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient(base_url='', api_key='') # Fill in your own API Key + client = ZaiClient() # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( From df3829b2b30c5a510e6a6672a1577fc1e7a42bfa Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 12:10:25 +0800 Subject: [PATCH 5/8] add stream reponse to audio.speech --- src/zai/api_resource/audio/audio.py | 3 +++ tests/integration_tests/test_audio.py | 1 + 2 files changed, 4 insertions(+) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index bed126c..2de7b47 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING, Mapping, Optional, cast import httpx +from httpx import stream from zai.core import ( NOT_GIVEN, @@ -58,6 +59,7 @@ def speech( encode_format: str = None, speed: float | None = 1.0, volume: float | None = 1.0, + stream: bool | None = False ) -> HttpxBinaryResponseContent: """ Generate speech audio from text input @@ -86,6 +88,7 @@ def speech( 'user_id': user_id, 'speed': speed, 'volume': volume, + 'stream': stream } ) return self._post( diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 88dbb01..e9ef33c 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -17,6 +17,7 @@ def test_audio_speech(logging_conf): voice='female', response_format='pcm', encode_format='base64', + stream=False, speed=1.0, volume=1.0, ) From 725ac1633b5daa8f927a3f5542898fdef74bcacb Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 15:24:29 +0800 Subject: [PATCH 6/8] =?UTF-8?q?=E6=9A=82=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/zai/api_resource/audio/audio.py | 4 ++- src/zai/types/audio/__init__.py | 3 ++- src/zai/types/audio/audio_speech_chunk.py | 32 +++++++++++++++++++++++ tests/integration_tests/test_audio.py | 15 +++++++++-- 4 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 src/zai/types/audio/audio_speech_chunk.py diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index 2de7b47..135739b 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -23,6 +23,8 @@ from zai.types.sensitive_word_check import SensitiveWordCheckRequest from .transcriptions import Transcriptions +from ...core import StreamResponse +from ...types.audio import AudioSpeechChunk if TYPE_CHECKING: from zai._client import ZaiClient @@ -60,7 +62,7 @@ def speech( speed: float | None = 1.0, volume: float | None = 1.0, stream: bool | None = False - ) -> HttpxBinaryResponseContent: + ) -> HttpxBinaryResponseContent | StreamResponse[AudioSpeechChunk]: """ Generate speech audio from text input diff --git a/src/zai/types/audio/__init__.py b/src/zai/types/audio/__init__.py index ad73c7b..f53dd5e 100644 --- a/src/zai/types/audio/__init__.py +++ b/src/zai/types/audio/__init__.py @@ -1,5 +1,6 @@ from .audio_customization_param import AudioCustomizationParam +from .audio_speech_chunk import AudioSpeechChunk from .audio_speech_params import AudioSpeechParams from .transcriptions_create_param import TranscriptionsParam -__all__ = ['AudioSpeechParams', 'AudioCustomizationParam', 'TranscriptionsParam'] +__all__ = ['AudioSpeechParams', 'AudioCustomizationParam', 'TranscriptionsParam', 'AudioSpeechChunk'] diff --git a/src/zai/types/audio/audio_speech_chunk.py b/src/zai/types/audio/audio_speech_chunk.py new file mode 100644 index 0000000..3d18c3b --- /dev/null +++ b/src/zai/types/audio/audio_speech_chunk.py @@ -0,0 +1,32 @@ +from typing import List, Optional, Dict, Any + +from ...core import BaseModel + +__all__ = [ + "AudioSpeechChunk", + "AudioError", + "AudioSpeechChoice", + "AudioSpeechDelta" +] + + +class AudioSpeechDelta(BaseModel): + content: Optional[str] = None + role: Optional[str] = None + + +class AudioSpeechChoice(BaseModel): + delta: AudioSpeechDelta + finish_reason: Optional[str] = None + index: int + +class AudioError(BaseModel): + code: Optional[str] = None + message: Optional[str] = None + + +class AudioSpeechChunk(BaseModel): + choices: List[AudioSpeechChoice] + request_id: Optional[str] = None + created: Optional[int] = None + error: Optional[AudioError] = None \ No newline at end of file diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index e9ef33c..9d6ecbd 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -1,3 +1,4 @@ +import base64 import logging import logging.config from pathlib import Path @@ -5,10 +6,12 @@ import zai from zai import ZaiClient +from src.zai import ZhipuAiClient + def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZaiClient() # Fill in your own API Key + client = ZhipuAiClient(base_url='https://open.bigmodel.cn/api/paas/v4', api_key='adf953faf621426da79103110eb41473.3FCVJZcTaq0Q7i3W') # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( @@ -21,7 +24,15 @@ def test_audio_speech(logging_conf): speed=1.0, volume=1.0, ) - response.stream_to_file(speech_file_path) + with open("output.pcm", "wb") as f: + for item in response: + choice = item.choices[0] + index = choice.index + finish_reason = choice.finish_reason + audio_delta = choice.delta.content + if finish_reason is not None: + break + f.write(base64.b64decode(audio_delta)) except zai.core._errors.APIRequestFailedError as err: print(err) From 85530c4c6be4dec3481374c423ae3ad76fbfc605 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 15:42:32 +0800 Subject: [PATCH 7/8] improve code --- src/zai/api_resource/audio/audio.py | 8 ++++---- src/zai/types/audio/audio_speech_params.py | 4 ++++ tests/integration_tests/test_audio.py | 10 +++++----- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/zai/api_resource/audio/audio.py b/src/zai/api_resource/audio/audio.py index 135739b..a3cd152 100644 --- a/src/zai/api_resource/audio/audio.py +++ b/src/zai/api_resource/audio/audio.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Mapping, Optional, cast import httpx -from httpx import stream from zai.core import ( NOT_GIVEN, @@ -23,8 +22,8 @@ from zai.types.sensitive_word_check import SensitiveWordCheckRequest from .transcriptions import Transcriptions -from ...core import StreamResponse -from ...types.audio import AudioSpeechChunk +from zai.core._streaming import StreamResponse +from zai.types.audio import AudioSpeechChunk if TYPE_CHECKING: from zai._client import ZaiClient @@ -85,7 +84,6 @@ def speech( 'voice': voice, 'response_format': response_format, 'encode_format': encode_format, - 'sensitive_word_check': sensitive_word_check, 'request_id': request_id, 'user_id': user_id, 'speed': speed, @@ -98,6 +96,8 @@ def speech( body=maybe_transform(body, AudioSpeechParams), options=make_request_options(extra_headers=extra_headers, extra_body=extra_body, timeout=timeout), cast_type=HttpxBinaryResponseContent, + stream=stream or False, + stream_cls=StreamResponse[AudioSpeechChunk] ) def customization( diff --git a/src/zai/types/audio/audio_speech_params.py b/src/zai/types/audio/audio_speech_params.py index c8eb538..8843ab3 100644 --- a/src/zai/types/audio/audio_speech_params.py +++ b/src/zai/types/audio/audio_speech_params.py @@ -29,3 +29,7 @@ class AudioSpeechParams(TypedDict, total=False): sensitive_word_check: Optional[SensitiveWordCheckRequest] request_id: str user_id: str + encode_format: str + speed: float + volume: float + stream: bool diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 9d6ecbd..9a10e9a 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -6,12 +6,12 @@ import zai from zai import ZaiClient -from src.zai import ZhipuAiClient +from zai import ZhipuAiClient def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZhipuAiClient(base_url='https://open.bigmodel.cn/api/paas/v4', api_key='adf953faf621426da79103110eb41473.3FCVJZcTaq0Q7i3W') # Fill in your own API Key + client = ZhipuAiClient() # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech( @@ -20,7 +20,7 @@ def test_audio_speech(logging_conf): voice='female', response_format='pcm', encode_format='base64', - stream=False, + stream=True, speed=1.0, volume=1.0, ) @@ -29,9 +29,9 @@ def test_audio_speech(logging_conf): choice = item.choices[0] index = choice.index finish_reason = choice.finish_reason - audio_delta = choice.delta.content - if finish_reason is not None: + if choice.delta is None: break + audio_delta = choice.delta.content f.write(base64.b64decode(audio_delta)) except zai.core._errors.APIRequestFailedError as err: From ee8d248e2d97ff99fca3ca269fb9a728f1b55660 Mon Sep 17 00:00:00 2001 From: yuhongxiao Date: Sun, 28 Sep 2025 15:50:37 +0800 Subject: [PATCH 8/8] change test code --- tests/integration_tests/test_audio.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/integration_tests/test_audio.py b/tests/integration_tests/test_audio.py index 9a10e9a..3dbcf66 100644 --- a/tests/integration_tests/test_audio.py +++ b/tests/integration_tests/test_audio.py @@ -6,12 +6,9 @@ import zai from zai import ZaiClient -from zai import ZhipuAiClient - - def test_audio_speech(logging_conf): logging.config.dictConfig(logging_conf) # type: ignore - client = ZhipuAiClient() # Fill in your own API Key + client = ZaiClient() # Fill in your own API Key try: speech_file_path = Path(__file__).parent / 'asr1.pcm' response = client.audio.speech(