Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/pipecat/services/aws/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pipecat.services import DeprecatedModuleProxy

from .config import *
from .llm import *
from .nova_sonic import *
from .stt import *
Expand Down
53 changes: 53 additions & 0 deletions src/pipecat/services/aws/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#
# Copyright (c) 2024–2025, Daily
#
# SPDX-License-Identifier: BSD 2-Clause License
#

"""Configuration for the AWS Transcribe STT service."""

from typing import List, Optional

from pydantic import BaseModel

from pipecat.transcriptions.language import Language


class AWSInputParams(BaseModel):
"""Configuration parameters for the AWS Transcribe STT service.

Parameters:
sample_rate: Audio sample rate in Hz. Must be 8000 or 16000. Defaults to 16000.
language_code: Language for transcription. Cannot be used with identify_multiple_languages.
language_options: List of languages for multi-language identification. Required when identify_multiple_languages is True.
identify_multiple_languages: Enable multiple language identification. Defaults to False.
identify_language: Enable language identification. Defaults to False.
preferred_language: Preferred language from language_options to speed up identification.
enable_partial_results_stabilization: Enable stabilization of partial results. Defaults to True.
partial_results_stability: Stability level: "low" (faster, less stable) or "high" (slower, more stable). Defaults to "low".
media_encoding: Audio encoding format. Defaults to "linear16".
number_of_channels: Number of audio channels. Defaults to 1.
show_speaker_label: Enable speaker identification in transcription results. Defaults to False.
enable_channel_identification: Enable channel identification for multi-channel audio. Defaults to False.
vocabulary_name: Name of custom vocabulary to use for improved transcription accuracy.
vocabulary_filter_name: Name of vocabulary filter to apply for content filtering.

Note:
For real-time conversations, use partial_results_stability="low" for faster responses.
Multi-language identification may have higher latency than single language mode.
"""

sample_rate: int = 16000
language_code: Optional[Language] = None
language_options: Optional[List[Language]] = None
identify_multiple_languages: bool = False
identify_language: bool = False
preferred_language: Optional[Language] = None
enable_partial_results_stabilization: bool = True
partial_results_stability: str = "high"
media_encoding: str = "linear16"
number_of_channels: int = 1
show_speaker_label: bool = False
enable_channel_identification: bool = False
vocabulary_name: Optional[str] = None
vocabulary_filter_name: Optional[str] = None
Loading