Skip to content

Commit

Permalink
Merge In Recent Fixes (vocodedev#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mac Wilkinson authored and rjheeta committed Jun 8, 2024
1 parent 900ce77 commit e79522a
Show file tree
Hide file tree
Showing 24 changed files with 713 additions and 543 deletions.
610 changes: 315 additions & 295 deletions poetry.lock

Large diffs are not rendered by default.

36 changes: 12 additions & 24 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,46 +11,46 @@ homepage = "https://github.com/vocodedev/vocode-python"
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
aiohttp = "^3.9.5"
anthropic = "^0.28.0"
azure-cognitiveservices-speech = "^1.37.0"
elevenlabs = "^1.2.2"
fastapi = "^0.111.0"
janus = "^1.0.0"
jinja2 = "^3.1.4"
jsonschema = "^4.22.0"
loguru = "^0.7.2"
numpy = "^1.26.4"
openai = "^1.30.5"
opentelemetry-sdk = "^1.24.0"
phonenumbers = "^8.13.37"
pydantic = "^2.7.2"
pydantic-settings = "^2.3.0"
pyht = "^0.0.28"
redis = "^5.0.4"
requests = "^2.32.3"
sentry-sdk = { extras = ["fastapi"], version = "^2.3.1" }
sounddevice = "^0.4.7"
tiktoken = "^0.7.0"
uvicorn = "^0.30.0"
websockets = "^12.0"

# Agents
anthropic = { version = "^0.28.0", optional = true }
openai = { version = "^1.30.5", optional = true }
tiktoken = { version = "0.7.0", optional = true }

# Synthesizers
azure-cognitiveservices-speech = { version = "^1.37.0", optional = true }
elevenlabs = { version = "^1.2.2", optional = true }
google-cloud-texttospeech = { version = "^2.16.3", optional = true }
miniaudio = { version = "^1.59", optional = true }
nltk = { version = "^3.8.1", optional = true }
pvkoala = { version = "^2.0.1", optional = true }
pydub = { version = "^0.25.1", optional = true }
pyht = { version = "^0.0.28", optional = true }

# Transcribers
google-cloud-speech = { version = "^2.26.0", optional = true }

# Telephony
redis = { version = "^5.0.4", optional = true }
twilio = { version = "^9.1.0", optional = true }
vonage = { version = "^3.14.0", optional = true }

# Misc
langchain = { version = "^0.2.1", optional = true }
langchain-community = { version = "^0.2.1", optional = true }
sentry-sdk = { extras = ["fastapi"], version = "^2.3.1", optional = true }


[tool.poetry.group.lint.dependencies]
Expand All @@ -76,39 +76,27 @@ pytest-httpx = "^0.30.0"
pytest-mock = "^3.14.0"

[tool.poetry.extras]
agents = ["anthropic", "openai", "tiktoken"]
synthesizers = [
"azure-cognitiveservices-speech",
"elevenlabs",
"google-cloud-texttospeech",
"miniaudio",
"nltk",
"pvkoala",
"pydub",
"pyht",
]
transcribers = ["google-cloud-speech"]
telephony = ["twilio", "redis", "vonage"]
misc = ["langchain", "langchain-community", "sentry-sdk"]
telephony = ["twilio", "vonage"]
misc = ["langchain", "langchain-community"]
all = [
"anthropic",
"openai",
"tiktoken",
"azure-cognitiveservices-speech",
"elevenlabs",
"google-cloud-texttospeech",
"miniaudio",
"nltk",
"pvkoala",
"pydub",
"pyht",
"google-cloud-speech",
"twilio",
"redis",
"vonage",
"langchain",
"langchain-community",
"sentry-sdk",
]

[tool.mypy]
Expand Down
42 changes: 34 additions & 8 deletions quickstarts/streaming_conversation.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,46 @@
import asyncio
import signal

from dotenv import load_dotenv
from pydantic_settings import BaseSettings, SettingsConfigDict

from vocode.helpers import create_streaming_microphone_input_and_speaker_output
from vocode.logging import configure_pretty_logging
from vocode.streaming.agent.chat_gpt_agent import ChatGPTAgent
from vocode.streaming.models.agent import ChatGPTAgentConfig
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.models.synthesizer import AzureSynthesizerConfig
from vocode.streaming.models.transcriber import (
DeepgramTranscriberConfig,
PunctuationEndpointingConfig,
)
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.streaming.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.streaming.transcriber.deepgram_transcriber import DeepgramTranscriber

load_dotenv()
configure_pretty_logging()

from vocode.helpers import create_streaming_microphone_input_and_speaker_output
from vocode.streaming.models.message import BaseMessage
from vocode.streaming.streaming_conversation import StreamingConversation

configure_pretty_logging()
class Settings(BaseSettings):
"""
Settings for the streaming conversation quickstart.
These parameters can be configured with environment variables.
"""

openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"
deepgram_api_key: str = "ENTER_YOUR_DEEPGRAM_API_KEY_HERE"

azure_speech_region: str = "eastus"

# This means a .env file can be used to overload these settings
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
)


settings = Settings()


async def main():
Expand All @@ -38,15 +58,21 @@ async def main():
DeepgramTranscriberConfig.from_input_device(
microphone_input,
endpointing_config=PunctuationEndpointingConfig(),
)
api_key=settings.deepgram_api_key,
),
),
agent=ChatGPTAgent(
ChatGPTAgentConfig(
openai_api_key=settings.openai_api_key,
initial_message=BaseMessage(text="What up"),
prompt_preamble="""The AI is having a pleasant conversation about life""",
)
),
synthesizer=AzureSynthesizer(AzureSynthesizerConfig.from_output_device(speaker_output)),
synthesizer=AzureSynthesizer(
AzureSynthesizerConfig.from_output_device(speaker_output),
azure_speech_key=settings.azure_speech_key,
azure_speech_region=settings.azure_speech_region,
),
)
await conversation.start()
print("Conversation started, press Ctrl+C to end")
Expand Down
34 changes: 25 additions & 9 deletions quickstarts/turn_based_conversation.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
from dotenv import load_dotenv
from pydantic_settings import BaseSettings, SettingsConfigDict

from vocode import getenv
from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
from vocode.turn_based.synthesizer.azure_synthesizer import AzureSynthesizer
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
from vocode.turn_based.turn_based_conversation import TurnBasedConversation

load_dotenv()

# See https://api.elevenlabs.io/v1/voices
ADAM_VOICE_ID = "pNInz6obpgDQGcFmaJgB"
class Settings(BaseSettings):
"""
Settings for the turn-based conversation quickstart.
These parameters can be configured with environment variables.
"""

openai_api_key: str = "ENTER_YOUR_OPENAI_API_KEY_HERE"
azure_speech_key: str = "ENTER_YOUR_AZURE_KEY_HERE"

azure_speech_region: str = "eastus"

# This means a .env file can be used to overload these settings
# ex: "OPENAI_API_KEY=my_key" will set openai_api_key over the default above
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
)


settings = Settings()

if __name__ == "__main__":
(
Expand All @@ -23,15 +39,15 @@
conversation = TurnBasedConversation(
input_device=microphone_input,
output_device=speaker_output,
transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
transcriber=WhisperTranscriber(api_key=settings.openai_api_key),
agent=ChatGPTAgent(
system_prompt="The AI is having a pleasant conversation about life",
initial_message="Hello!",
api_key=getenv("OPENAI_API_KEY"),
api_key=settings.openai_api_key,
),
synthesizer=AzureSynthesizer(
api_key=getenv("AZURE_SPEECH_KEY"),
region=getenv("AZURE_SPEECH_REGION"),
api_key=settings.azure_speech_key,
region=settings.azure_speech_region,
voice_name="en-US-SteffanNeural",
),
)
Expand Down
4 changes: 4 additions & 0 deletions vocode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
import sentry_sdk
from loguru import logger

from vocode.meta import ensure_punkt_installed

environment = {}
logger.disable("vocode")

ensure_punkt_installed()


class ContextWrapper:
"""Context Variable Wrapper."""
Expand Down
15 changes: 15 additions & 0 deletions vocode/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from loguru import logger


def ensure_punkt_installed():
try:
from nltk.data import find

find("tokenizers/punkt")
except LookupError:
from nltk import download

# If not installed, download 'punkt'
logger.info("Downloading 'punkt' tokenizer...")
download("punkt")
logger.info("'punkt' tokenizer downloaded successfully.")
81 changes: 66 additions & 15 deletions vocode/streaming/agent/chat_gpt_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

import sentry_sdk
from loguru import logger
from openai import AsyncAzureOpenAI, AsyncOpenAI
from openai import DEFAULT_MAX_RETRIES as OPENAI_DEFAULT_MAX_RETRIES
from openai import AsyncAzureOpenAI, AsyncOpenAI, NotFoundError, RateLimitError

from vocode import sentry_span_tags
from vocode.streaming.action.abstract_factory import AbstractActionFactory
Expand All @@ -27,6 +28,24 @@
ChatGPTAgentConfigType = TypeVar("ChatGPTAgentConfigType", bound=ChatGPTAgentConfig)


def instantiate_openai_client(agent_config: ChatGPTAgentConfig, model_fallback: bool = False):
if agent_config.azure_params:
return AsyncAzureOpenAI(
azure_endpoint=agent_config.azure_params.base_url,
api_key=agent_config.azure_params.api_key,
api_version=agent_config.azure_params.api_version,
max_retries=0 if model_fallback else OPENAI_DEFAULT_MAX_RETRIES,
)
else:
if agent_config.openai_api_key is not None:
logger.info("Using OpenAI API key override")
return AsyncOpenAI(
api_key=agent_config.openai_api_key or os.environ["OPENAI_API_KEY"],
base_url="https://api.openai.com/v1",
max_retries=0 if model_fallback else OPENAI_DEFAULT_MAX_RETRIES,
)


class ChatGPTAgent(RespondAgent[ChatGPTAgentConfigType]):
openai_client: Union[AsyncOpenAI, AsyncAzureOpenAI]

Expand All @@ -42,19 +61,9 @@ def __init__(
action_factory=action_factory,
**kwargs,
)
if agent_config.azure_params:
self.openai_client = AsyncAzureOpenAI(
azure_endpoint=agent_config.azure_params.base_url,
api_key=agent_config.azure_params.api_key,
api_version=agent_config.azure_params.api_version,
)
else:
if agent_config.openai_api_key is not None:
logger.info("Using OpenAI API key override")
self.openai_client = AsyncOpenAI(
api_key=agent_config.openai_api_key or os.environ["OPENAI_API_KEY"],
base_url="https://api.openai.com/v1",
)
self.openai_client = instantiate_openai_client(
agent_config, model_fallback=agent_config.llm_fallback is not None
)

if not self.openai_client.api_key:
raise ValueError("OPENAI_API_KEY must be set in environment or passed in")
Expand Down Expand Up @@ -109,9 +118,37 @@ def get_model_name_for_tokenizer(self):
else:
return self.agent_config.azure_params.openai_model_name

async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncGenerator:
def apply_model_fallback(self, chat_parameters: Dict[str, Any]):
if self.agent_config.llm_fallback is None:
return
if self.agent_config.llm_fallback.provider == "openai":
self.agent_config.model_name = self.agent_config.llm_fallback.model_name
if isinstance(self.openai_client, AsyncAzureOpenAI):
self.agent_config.azure_params = None
else:
if self.agent_config.azure_params:
self.agent_config.azure_params.deployment_name = (
self.agent_config.llm_fallback.model_name
)
if isinstance(self.openai_client, AsyncOpenAI):
# TODO: handle OpenAI fallback to Azure
pass

self.openai_client = instantiate_openai_client(self.agent_config, model_fallback=False)
chat_parameters["model"] = self.agent_config.llm_fallback.model_name

async def _create_openai_stream_with_fallback(
self, chat_parameters: Dict[str, Any]
) -> AsyncGenerator:
try:
stream = await self.openai_client.chat.completions.create(**chat_parameters)
except (NotFoundError, RateLimitError) as e:
logger.error(
f"{'Model not found' if isinstance(e, NotFoundError) else 'Rate limit error'} for model_name: {chat_parameters.get('model')}. Applying fallback.",
exc_info=True,
)
self.apply_model_fallback(chat_parameters)
stream = await self.openai_client.chat.completions.create(**chat_parameters)
except Exception as e:
logger.error(
f"Error while hitting OpenAI with chat_parameters: {chat_parameters}",
Expand All @@ -120,6 +157,20 @@ async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncG
raise e
return stream

async def _create_openai_stream(self, chat_parameters: Dict[str, Any]) -> AsyncGenerator:
if self.agent_config.llm_fallback is not None and self.openai_client.max_retries == 0:
stream = await self._create_openai_stream_with_fallback(chat_parameters)
else:
try:
stream = await self.openai_client.chat.completions.create(**chat_parameters)
except Exception as e:
logger.error(
f"Error while hitting OpenAI with chat_parameters: {chat_parameters}",
exc_info=True,
)
raise e
return stream

def should_backchannel(self, human_input: str) -> bool:
return (
not self.is_first_response()
Expand Down
1 change: 0 additions & 1 deletion vocode/streaming/agent/default_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
AnthropicAgentConfig,
ChatGPTAgentConfig,
EchoAgentConfig,
LlamacppAgentConfig,
RESTfulUserImplementedAgentConfig,
)

Expand Down
Loading

0 comments on commit e79522a

Please sign in to comment.