vocodedev · arpagon · May 10, 2024 · May 10, 2024 · May 10, 2024 · May 10, 2024
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ homepage = "https://github.com/vocodedev/vocode-python"
 python = ">=3.8.1,<3.12"
 pydub = "^0.25.1"
 nltk = "^3.8.1"
-openai = "^0.27.8"
+openai = "^1.28.0"
 sounddevice = "^0.4.6"
 azure-cognitiveservices-speech = "^1.27.0"
 websockets = "^11.0.2"

diff --git a/tests/streaming/agent/test_utils.py b/tests/streaming/agent/test_utils.py
diff --git a/vocode/streaming/agent/chat_gpt_agent.py b/vocode/streaming/agent/chat_gpt_agent.py
@@ -1,11 +1,7 @@
 import logging
+from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
 
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-import openai
-from typing import AsyncGenerator, Optional, Tuple
-
-import logging
+from openai import AsyncAzureOpenAI, AsyncOpenAI, OpenAI, AzureOpenAI
 from pydantic.v1 import BaseModel
 
 from vocode import getenv
@@ -36,17 +32,28 @@ def __init__(
         super().__init__(
             agent_config=agent_config, action_factory=action_factory, logger=logger
         )
+        self.async_openai_client: Union[AsyncAzureOpenAI, AsyncOpenAI]
+        self.openai_client: Union[AzureOpenAI, OpenAI]
+
         if agent_config.azure_params:
-            openai.api_type = agent_config.azure_params.api_type
-            openai.api_base = getenv("AZURE_OPENAI_API_BASE")
-            openai.api_version = agent_config.azure_params.api_version
-            openai.api_key = getenv("AZURE_OPENAI_API_KEY")
+            self.async_openai_client = AsyncAzureOpenAI(
+                api_version=agent_config.azure_params.api_version,
+                api_key=getenv("AZURE_OPENAI_API_KEY"),
+                azure_endpoint=getenv("AZURE_OPENAI_API_BASE"),
+            )
+            self.openai_client = AzureOpenAI(
+                api_version=agent_config.azure_params.api_version,
+                api_key=getenv("AZURE_OPENAI_API_KEY"),
+                azure_endpoint=getenv("AZURE_OPENAI_API_BASE"),
+            )
         else:
-            openai.api_type = "open_ai"
-            openai.api_base = "https://api.openai.com/v1"
-            openai.api_version = None
-            openai.api_key = openai_api_key or getenv("OPENAI_API_KEY")
-        if not openai.api_key:
+            self.async_openai_client = AsyncOpenAI(
+                api_key=openai_api_key or getenv("OPENAI_API_KEY"),
+            )
+            self.openai_client = OpenAI(
+                api_key=openai_api_key or getenv("OPENAI_API_KEY"),
+            )
+        if not self.openai_client.api_key:
             raise ValueError("OPENAI_API_KEY must be set in environment or passed in")
         self.first_response = (
             self.create_first_response(agent_config.expected_first_prompt)
@@ -104,7 +111,7 @@ def create_first_response(self, first_prompt):
         ]
 
         parameters = self.get_chat_parameters(messages)
-        return openai.ChatCompletion.create(**parameters)
+        return self.openai_client.ChatCompletion.create(**parameters)
 
     def attach_transcript(self, transcript: Transcript):
         self.transcript = transcript
@@ -126,7 +133,9 @@ async def respond(
             text = self.first_response
         else:
             chat_parameters = self.get_chat_parameters()
-            chat_completion = await openai.ChatCompletion.acreate(**chat_parameters)
+            chat_completion = await self.async_openai_client.chat.completions.create(
+                **chat_parameters
+            )
             text = chat_completion.choices[0].message.content
         self.logger.debug(f"LLM response: {text}")
         return text, False
@@ -172,7 +181,9 @@ async def generate_response(
         else:
             chat_parameters = self.get_chat_parameters()
         chat_parameters["stream"] = True
-        stream = await openai.ChatCompletion.acreate(**chat_parameters)
+        stream = await self.async_openai_client.chat.completions.create(
+            **chat_parameters
+        )
         async for message in collate_response_async(
             openai_get_tokens(stream), get_functions=True
         ):

diff --git a/vocode/streaming/agent/llm_agent.py b/vocode/streaming/agent/llm_agent.py
@@ -101,7 +101,7 @@ async def respond(
         return response, False
 
     async def _stream_sentences(self, prompt):
-        stream = await openai.Completion.acreate(
+        stream = await openai.completions.create(
             prompt=prompt,
             max_tokens=self.agent_config.max_tokens,
             temperature=self.agent_config.temperature,

diff --git a/vocode/streaming/agent/utils.py b/vocode/streaming/agent/utils.py
@@ -13,7 +13,7 @@
     Union,
 )
 
-from openai.openai_object import OpenAIObject
+# from openai.openai_object import OpenAIObject
 from vocode.streaming.models.actions import FunctionCall, FunctionFragment
 from vocode.streaming.models.events import Sender
 from vocode.streaming.models.transcript import (
@@ -75,29 +75,31 @@ async def collate_response_async(
 
 async def openai_get_tokens(gen) -> AsyncGenerator[Union[str, FunctionFragment], None]:
     async for event in gen:
-        choices = event.get("choices", [])
+        choices = event.choices
         if len(choices) == 0:
             continue
         choice = choices[0]
         if choice.finish_reason:
             break
-        delta = choice.get("delta", {})
-        if "text" in delta and delta["text"] is not None:
-            token = delta["text"]
+        delta = choice.delta
+
+        if hasattr(delta, "text") and delta.text:
+            token = delta.text
             yield token
-        if "content" in delta and delta["content"] is not None:
-            token = delta["content"]
+        if hasattr(delta, "content") and delta.content:
+            token = delta.content
             yield token
-        elif "function_call" in delta and delta["function_call"] is not None:
+        elif hasattr(delta, "function_call") and delta.function_call:
             yield FunctionFragment(
                 name=(
-                    delta["function_call"]["name"]
-                    if "name" in delta["function_call"]
+                    delta.function_call.name
+                    if hasattr(delta.function_call, "name") and delta.function_call.name
                     else ""
                 ),
                 arguments=(
-                    delta["function_call"]["arguments"]
-                    if "arguments" in delta["function_call"]
+                    delta.function_call.arguments
+                    if hasattr(delta.function_call, "arguments")
+                    and delta.function_call.arguments
                     else ""
                 ),
             )

diff --git a/vocode/streaming/utils/goodbye_model.py b/vocode/streaming/utils/goodbye_model.py
@@ -1,9 +1,10 @@
 import os
 import asyncio
+import requests
 from typing import Optional
-import openai
+
+from openai import AsyncOpenAI
 import numpy as np
-import requests
 
 from vocode import getenv
 
@@ -29,8 +30,10 @@ def __init__(
         ),
         openai_api_key: Optional[str] = None,
     ):
-        openai.api_key = openai_api_key or getenv("OPENAI_API_KEY")
-        if not openai.api_key:
+        self.async_openai_client = AsyncOpenAI(
+            api_key=openai_api_key or getenv("OPENAI_API_KEY")
+        )
+        if not self.async_openai_client.api_key:
             raise ValueError("OPENAI_API_KEY must be set in environment or passed in")
         self.embeddings_cache_path = embeddings_cache_path
         self.goodbye_embeddings: Optional[np.ndarray] = None
@@ -65,20 +68,28 @@ async def is_goodbye(self, text: str) -> bool:
         return np.max(similarity_results) > SIMILARITY_THRESHOLD
 
     async def create_embedding(self, text) -> np.ndarray:
-        params = {
-            "input": text,
-        }
+        """
+        Create an embedding for the given text using the OpenAI API.
 
-        engine = getenv("AZURE_OPENAI_TEXT_EMBEDDING_ENGINE")
-        if engine:
-            params["engine"] = engine
-        else:
-            params["model"] = "text-embedding-ada-002"
+        Args:
+            text (str): The text to embed.
+
+        Returns:
+            np.ndarray: The embedding vector as a numpy array.
+        """
+        # Define the model to use
+        model = getenv("AZURE_OPENAI_TEXT_EMBEDDING_ENGINE") or "text-embedding-ada-002"
 
-        return np.array(
-            (await openai.Embedding.acreate(**params))["data"][0]["embedding"]
+        # Create the embedding using the OpenAI API
+        response = await self.async_openai_client.embeddings.create(
+            input=text, model=model
         )
 
+        # # Extract the embedding data from the response
+        embedding = np.array(response.data[0].embedding)
+
+        return embedding
+
 
 if __name__ == "__main__":
     from dotenv import load_dotenv

diff --git a/vocode/streaming/vector_db/base_vector_db.py b/vocode/streaming/vector_db/base_vector_db.py
@@ -1,7 +1,8 @@
 import os
 from typing import Iterable, List, Optional, Tuple
+
 import aiohttp
-import openai
+from openai import AsyncOpenAI
 from langchain.docstore.document import Document
 
 DEFAULT_OPENAI_EMBEDDING_MODEL = "text-embedding-ada-002"
@@ -19,21 +20,33 @@ def __init__(
         else:
             self.aiohttp_session = aiohttp.ClientSession()
             self.should_close_session_on_tear_down = True
+        self.async_openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
-    async def create_openai_embedding(
-        self, text, model=DEFAULT_OPENAI_EMBEDDING_MODEL
-    ) -> List[float]:
-        params = {
-            "input": text,
-        }
+    async def create_openai_embedding(self, text: str) -> List[float]:
+        """
+        Create an embedding for the given text using the OpenAI API.
 
-        engine = os.getenv("AZURE_OPENAI_TEXT_EMBEDDING_ENGINE")
-        if engine:
-            params["engine"] = engine
-        else:
-            params["model"] = model
+        Args:
+            text (str): The text to embed.
+
+        Returns:
+            List[float]: The embedding vector as a list of floats.
+        """
+        # Define the model to use based on environment variable or default
+        model = (
+            os.getenv("AZURE_OPENAI_TEXT_EMBEDDING_ENGINE")
+            or DEFAULT_OPENAI_EMBEDDING_MODEL
+        )
+
+        # Create the embedding using the OpenAI API
+        response = await self.async_openai_client.embeddings.create(
+            input=text, model=model
+        )
+
+        # Extract the embedding data from the response and convert to list
+        embedding = list(response.data[0].embedding)
 
-        return list((await openai.Embedding.acreate(**params))["data"][0]["embedding"])
+        return embedding
 
     async def add_texts(
         self,

diff --git a/vocode/turn_based/transcriber/whisper_transcriber.py b/vocode/turn_based/transcriber/whisper_transcriber.py
@@ -1,22 +1,24 @@
 from typing import Optional
 from pydub import AudioSegment
 import io
-import openai
+from openai import OpenAI
 from vocode import getenv
 
 from vocode.turn_based.transcriber.base_transcriber import BaseTranscriber
 
 
 class WhisperTranscriber(BaseTranscriber):
     def __init__(self, api_key: Optional[str] = None):
-        openai.api_key = getenv("OPENAI_API_KEY", api_key)
-        if not openai.api_key:
+        self.openai_client = OpenAI(api_key=api_key)
+        if not self.openai_client.api_key:
             raise ValueError("OpenAI API key not provided")
 
     def transcribe(self, audio_segment: AudioSegment) -> str:
         in_memory_wav = io.BytesIO()
         audio_segment.export(in_memory_wav, format="wav")  # type: ignore
         in_memory_wav.seek(0)
         in_memory_wav.name = "whisper.wav"
-        transcript = openai.Audio.transcribe("whisper-1", in_memory_wav)
+        transcript = self.openai_client.audio.transcriptions.create(
+            model="whisper-1", file=in_memory_wav
+        )
         return transcript.text