Skip to content

Commit

Permalink
Migrate to OpenAI 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
bkiat1123 committed Nov 22, 2023
1 parent 63439b8 commit f86a514
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 53 deletions.
110 changes: 61 additions & 49 deletions llms/providers/openai.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from typing import AsyncGenerator, Dict, Generator, List, Optional, Union

import aiohttp
import tiktoken

import openai
from openai import AsyncOpenAI, OpenAI
import json

from ..results.result import AsyncStreamResult, Result, StreamResult
Expand All @@ -13,19 +11,18 @@
class OpenAIProvider(BaseProvider):
# cost is per million tokens
MODEL_INFO = {

"gpt-3.5-turbo": {"prompt": 2.0, "completion": 2.0, "token_limit": 4097, "is_chat": True},
"gpt-3.5-turbo-instruct": {"prompt": 2.0, "completion": 2.0, "token_limit": 4097, "is_chat": False},
"gpt-4": {"prompt": 30.0, "completion": 60.0, "token_limit": 8192, "is_chat": True},
"gpt-4-1106-preview": {"prompt": 10.0, "completion": 20.0, "token_limit": 128000, "is_chat": True},
}

def __init__(self, api_key, model=None):
openai.api_key = api_key
def __init__(self, api_key=None, model=None):
if model is None:
model = list(self.MODEL_INFO.keys())[0]
self.model = model
self.client = openai.ChatCompletion if self.is_chat_model else openai.Completion
self.client = OpenAI(api_key=api_key)
self.async_client = AsyncOpenAI(api_key=api_key)

@property
def is_chat_model(self) -> bool:
Expand Down Expand Up @@ -58,7 +55,7 @@ def _prepare_model_inputs(
self,
prompt: str,
history: Optional[List[dict]] = None,
system_message: str = None,
system_message: Union[str, List[dict], None] = None,
temperature: float = 0,
max_tokens: int = 300,
stream: bool = False,
Expand All @@ -70,9 +67,13 @@ def _prepare_model_inputs(
if history:
messages = [*history, *messages]

if system_message:
if isinstance(system_message, str):
messages = [{"role": "system", "content": system_message}, *messages]

# users can input multiple full system message in dict form
elif isinstance(system_message, list):
messages = [*system_message, *messages]

model_inputs = {
"messages": messages,
"temperature": temperature,
Expand Down Expand Up @@ -115,7 +116,7 @@ def complete(
system_message: system messages in OpenAI format, must have role and content key.
It can has name key to include few-shots examples.
"""

model_inputs = self._prepare_model_inputs(
prompt=prompt,
history=history,
Expand All @@ -124,35 +125,40 @@ def complete(
max_tokens=max_tokens,
**kwargs,
)

with self.track_latency():
response = self.client.create(model=self.model, **model_inputs)

if self.is_chat_model:
response = self.client.chat.completions.create(model=self.model, **model_inputs)
else:
response = self.client.completions.create(model=self.model, **model_inputs)

is_func_call = response.choices[0].finish_reason == "function_call"
function_call = {}
completion = ""
if self.is_chat_model:
if is_func_call:
completion = {
function_call = {
"name": response.choices[0].message.function_call.name,
"arguments": json.loads(response.choices[0].message.function_call.arguments)
}
else:
completion = response.choices[0].message.content.strip()
else:
completion = response.choices[0].text.strip()

usage = response.usage

meta = {
"tokens_prompt": usage["prompt_tokens"],
"tokens_completion": usage["completion_tokens"],
"tokens_prompt": usage.prompt_tokens,
"tokens_completion": usage.completion_tokens,
"latency": self.latency,
}
return Result(
text=completion if not is_func_call else '',
text=completion,
model_inputs=model_inputs,
provider=self,
meta=meta,
function_call=completion if is_func_call else {}
function_call=function_call,
)

async def acomplete(
Expand All @@ -162,7 +168,6 @@ async def acomplete(
system_message: Optional[List[dict]] = None,
temperature: float = 0,
max_tokens: int = 300,
aiosession: Optional[aiohttp.ClientSession] = None,
**kwargs,
) -> Result:
"""
Expand All @@ -171,9 +176,6 @@ async def acomplete(
system_message: system messages in OpenAI format, must have role and content key.
It can has name key to include few-shots examples.
"""
if aiosession is not None:
openai.aiosession.set(aiosession)

model_inputs = self._prepare_model_inputs(
prompt=prompt,
history=history,
Expand All @@ -184,7 +186,10 @@ async def acomplete(
)

with self.track_latency():
response = await self.client.acreate(model=self.model, **model_inputs)
if self.is_chat_model:
response = await self.async_client.chat.completions.create(model=self.model, **model_inputs)
else:
response = await self.async_client.completions.create(model=self.model, **model_inputs)

if self.is_chat_model:
completion = response.choices[0].message.content.strip()
Expand All @@ -194,8 +199,8 @@ async def acomplete(
usage = response.usage

meta = {
"tokens_prompt": usage["prompt_tokens"],
"tokens_completion": usage["completion_tokens"],
"tokens_prompt": usage.prompt_tokens,
"tokens_completion": usage.completion_tokens,
"latency": self.latency,
}
return Result(
Expand All @@ -208,8 +213,8 @@ async def acomplete(
def complete_stream(
self,
prompt: str,
history: Optional[List[tuple]] = None,
system_message: str = None,
history: Optional[List[dict]] = None,
system_message: Union[str, List[dict], None] = None,
temperature: float = 0,
max_tokens: int = 300,
**kwargs,
Expand All @@ -229,35 +234,39 @@ def complete_stream(
stream=True,
**kwargs,
)
response = self.client.create(model=self.model, **model_inputs)

if self.is_chat_model:
response = self.client.chat.completions.create(model=self.model, **model_inputs)
else:
response = self.client.completions.create(model=self.model, **model_inputs)
stream = self._process_stream(response)

return StreamResult(stream=stream, model_inputs=model_inputs, provider=self)

def _process_stream(self, response: Generator) -> Generator:
if self.is_chat_model:
chunk_generator = (
chunk["choices"][0].get("delta", {}).get("content", "")
for chunk in response
chunk.choices[0].delta.content for chunk in response
)
else:
chunk_generator = (
chunk["choices"][0].get("text", "") for chunk in response
chunk.choices[0].text for chunk in response
)

while not (first_text := next(chunk_generator)):
continue
yield first_text.lstrip()
yield from chunk_generator
for chunk in chunk_generator:
if chunk is not None:
yield chunk

async def acomplete_stream(
self,
prompt: str,
history: Optional[List[tuple]] = None,
system_message: str = None,
history: Optional[List[dict]] = None,
system_message: Union[str, List[dict], None] = None,
temperature: float = 0,
max_tokens: int = 300,
aiosession: Optional[aiohttp.ClientSession] = None,
**kwargs,
) -> AsyncStreamResult:
"""
Expand All @@ -266,9 +275,6 @@ async def acomplete_stream(
system_message: system messages in OpenAI format, must have role and content key.
It can has name key to include few-shots examples.
"""
if aiosession is not None:
openai.aiosession.set(aiosession)

model_inputs = self._prepare_model_inputs(
prompt=prompt,
history=history,
Expand All @@ -279,7 +285,11 @@ async def acomplete_stream(
**kwargs,
)

response = await self.client.acreate(model=self.model, **model_inputs)
with self.track_latency():
if self.is_chat_model:
response = await self.async_client.chat.completions.create(model=self.model, **model_inputs)
else:
response = await self.async_client.completions.create(model=self.model, **model_inputs)
stream = self._aprocess_stream(response)
return AsyncStreamResult(
stream=stream, model_inputs=model_inputs, provider=self
Expand All @@ -288,21 +298,23 @@ async def acomplete_stream(
async def _aprocess_stream(self, response: AsyncGenerator) -> AsyncGenerator:
if self.is_chat_model:
while True:
first_completion = (await response.__anext__())["choices"][0].get("delta", {}).get("content", "")
first_completion = (await response.__anext__()).choices[0].delta.content
if first_completion:
yield first_completion.lstrip()
break

async for chunk in response:
completion = chunk["choices"][0].get("delta", {}).get("content", "")
yield completion
completion = chunk.choices[0].delta.content
if completion is not None:
yield completion
else:
while True:
first_completion = (await response.__anext__())["choices"][0].get("text", "")
first_completion = (await response.__anext__()).choices[0].text
if first_completion:
yield first_completion.lstrip()
break

async for chunk in response:
completion = chunk["choices"][0].get("text", "")
yield completion
completion = chunk.choices[0].text
if completion is not None:
yield completion
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
openai
tiktoken
anthropic==0.3
anthropic>=0.3
anthropic_bedrock
ai21
cohere
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

setup(
name="pyllms",
version="0.3.8.1",
version="0.3.9",
description="Minimal Python library to connect to LLMs (OpenAI, Anthropic, Google Palm2/Vertex, AI21, Cohere, Aleph-Alpha, HuggingfaceHub), with a built-in model performance benchmark.",
long_description=long_description,
long_description_content_type="text/markdown",
author="Vladimir Prelovac",
author_email="[email protected]",
packages=find_packages(),
install_requires=[
"openai",
"openai>=1",
"tiktoken",
"anthropic>=0.3",
"anthropic_bedrock",
Expand All @@ -28,7 +28,8 @@
"google-cloud-aiplatform",
"prettytable",
"protobuf~=3.20.3",
"grpcio~=1.54.2"
"grpcio~=1.54.2",
"google-generativeai",
],
extras_require={
"local": ["einops", "accelerate"]
Expand Down

0 comments on commit f86a514

Please sign in to comment.