diff --git a/docs/reference/models/openai.md b/docs/reference/models/openai.md index bd1123a59..5ddd4a457 100644 --- a/docs/reference/models/openai.md +++ b/docs/reference/models/openai.md @@ -1,4 +1,4 @@ -# Generate text with the OpenAI API +# Generate text with the OpenAI and compatible APIs !!! Installation @@ -16,16 +16,33 @@ print(type(model)) # OpenAI ``` +Outlines also supports Azure OpenAI models: -It is possible to pass a system message to the model when initializing it: -```python +``` from outlines import models -model = models.openai("gpt-4", system_prompt="You are a useful assistant") +model = models.azure_openai( + api_version="2023-07-01-preview", + azure_endpoint="https://example-endpoint.openai.azure.com", +) +``` + +More generally, you can use any API client compatible with the OpenAI interface by passing an instance of the client, a configuration, and optionally the corresponding tokenizer (if you want to be able to use `outlines.generate.choice`): + +``` +from openai import AsyncOpenAI +import tiktoken + +from outlines.models.openai import OpenAI, OpenAIConfig + +config = OpenAIConfig(model="gpt-4") +client = AsyncOpenAI() +tokenizer = tiktoken.encoding_for_model("gpt-4") + +model = OpenAI(client, config, tokenizer) ``` -This message will be used for every subsequent use of the model: ## Monitoring API use diff --git a/outlines/models/__init__.py b/outlines/models/__init__.py index ca3335d08..15b370a85 100644 --- a/outlines/models/__init__.py +++ b/outlines/models/__init__.py @@ -7,12 +7,10 @@ """ from typing import Union -from .azure import AzureOpenAI, azure_openai from .exllamav2 import ExLlamaV2Model, exl2 from .llamacpp import LlamaCpp, llamacpp from .mamba import Mamba, mamba from .openai import OpenAI, openai -from .openai_compatible import OpenAICompatibleAPI, openai_compatible_api -from .transformers import Transformers, transformers +from .transformers import Transformer, transformers LogitsGenerator = Union[Transformers, LlamaCpp, ExLlamaV2Model, Mamba] diff --git a/outlines/models/azure.py b/outlines/models/azure.py deleted file mode 100644 index 9d93539a5..000000000 --- a/outlines/models/azure.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Integration with Azure OpenAI's API.""" -import functools -import os -from dataclasses import replace -from typing import Optional - -from outlines.models.openai import OpenAI, OpenAIConfig - -__all__ = ["AzureOpenAI", "azure_openai"] - - -AZURE_API_VERSION = "2023-05-15" - - -class AzureOpenAI(OpenAI): - def __init__( - self, - model_name: str, - deployment_name: str, - azure_endpoint: Optional[str] = None, - api_key: Optional[str] = None, - max_retries: int = 6, - timeout: Optional[float] = None, - system_prompt: Optional[str] = None, - config: Optional[OpenAIConfig] = None, - ): - """Create an `AzureOpenAI` instance. - - Parameters - ---------- - model_name - The name of the OpenAI model being used - deployment_name - The name of your Azure OpenAI deployment - api_key - Secret key to use with the OpenAI API. One can also set the - `OPENAI_API_KEY` environment variable, or the value of - `openai.api_key`. - max_retries - The maximum number of retries when calls to the API fail. - timeout - Duration after which the request times out. - system_prompt - The content of the system message that precedes the user's prompt. - config - An instance of `OpenAIConfig`. Can be useful to specify some - parameters that cannot be set by calling this class' methods. - - """ - try: - import openai - except ImportError: - raise ImportError( - "The `openai` library needs to be installed in order to use Outlines' Azure OpenAI integration." - ) - try: - client = openai.OpenAI() - client.models.retrieve(model_name) - except openai.NotFoundError: - raise ValueError( - "Invalid model_name. Check openai models list at https://platform.openai.com/docs/models" - ) - - self.model_name = model_name - - if api_key is None: - if os.getenv("AZURE_OPENAI_KEY") is not None: - api_key = os.getenv("AZURE_OPENAI_KEY") - elif openai.api_key is not None: - api_key = openai.api_key - else: - raise ValueError( - "You must specify an API key to use the Azure OpenAI API integration." - ) - if azure_endpoint is None: - if os.getenv("AZURE_OPENAI_ENDPOINT") is not None: - azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") - else: - raise ValueError( - "You must specify an API base to use the Azure OpenAI API integration." - ) - - if config is not None: - self.config = replace(config, model=deployment_name) # type: ignore - else: - self.config = OpenAIConfig(model=deployment_name) - - # This is necesssary because of an issue with the OpenAI API. - # Status updates: https://github.com/openai/openai-python/issues/769 - self.create_client = functools.partial( - openai.AsyncAzureOpenAI, - azure_endpoint=azure_endpoint, - api_key=api_key, - api_version=AZURE_API_VERSION, - max_retries=max_retries, - timeout=timeout, - ) - - self.system_prompt = system_prompt - - # We count the total number of prompt and generated tokens as returned - # by the OpenAI API, summed over all the requests performed with this - # model instance. - self.prompt_tokens = 0 - self.completion_tokens = 0 - - @property - def tokenizer(self): - try: - import tiktoken - except ImportError: - raise ImportError( - "The `tiktoken` library needs to be installed in order to choose `outlines.models.openai` with `is_in`" - ) - - return tiktoken.encoding_for_model(self.model_name) - - -azure_openai = AzureOpenAI diff --git a/outlines/models/openai.py b/outlines/models/openai.py index 4f9b5a869..8de138fa1 100644 --- a/outlines/models/openai.py +++ b/outlines/models/openai.py @@ -1,20 +1,15 @@ """Integration with OpenAI's API.""" import functools -import os -import textwrap from dataclasses import asdict, dataclass, field, replace from itertools import zip_longest -from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Callable, Dict, List, Optional, Set, Tuple, Union import numpy as np from outlines.base import vectorize from outlines.caching import cache -__all__ = ["OpenAI", "openai"] - -if TYPE_CHECKING: - from openai import AsyncOpenAI +__all__ = ["OpenAI", "openai", "azure_openai"] @dataclass(frozen=True) @@ -27,7 +22,7 @@ class OpenAIConfig: Properties ---------- - model_name + model The name of the model. Available models can be found on OpenAI's website. frequence_penalty Number between 2.0 and -2.0. Positive values penalize new tokens based on @@ -66,7 +61,7 @@ class OpenAIConfig: response_format: Optional[Dict[str, str]] = None seed: Optional[int] = None stop: Optional[Union[str, List[str]]] = None - temperature: Optional[float] = None + temperature: float = 1.0 top_p: int = 1 user: str = field(default_factory=str) @@ -76,74 +71,31 @@ class OpenAI: def __init__( self, - model_name: str, - api_key: Optional[str] = None, - max_retries: int = 6, - timeout: Optional[float] = None, + client, + config, + tokenizer=None, system_prompt: Optional[str] = None, - config: Optional[OpenAIConfig] = None, ): """Create an `OpenAI` instance. + This class supports the standard OpenAI API, the Azure OpeanAI API as + well as compatible APIs that rely on the OpenAI client. + Parameters ---------- - model_name - Model to use, as defined in OpenAI's documentation - api_key - Secret key to use with the OpenAI API. One can also set the - `OPENAI_API_KEY` environment variable, or the value of - `openai.api_key`. - max_retries - The maximum number of retries when calls to the API fail. - timeout - Duration after which the request times out. - system_prompt - The content of the system message that precedes the user's prompt. + client + An instance of the API's async client. config An instance of `OpenAIConfig`. Can be useful to specify some parameters that cannot be set by calling this class' methods. + tokenizer + The tokenizer associated with the model the client connects to. """ - try: - import openai - except ImportError: - raise ImportError( - "The `openai` library needs to be installed in order to use Outlines' OpenAI integration." - ) - - if api_key is None: - if os.getenv("OPENAI_API_KEY") is not None: - api_key = os.getenv("OPENAI_API_KEY") - elif openai.api_key is not None: - api_key = openai.api_key - else: - raise ValueError( - "You must specify an API key to use the OpenAI API integration." - ) - try: - client = openai.OpenAI(api_key=api_key) - client.models.retrieve(model_name) - except openai.NotFoundError: - raise ValueError( - "Invalid model_name. Check openai models list at https://platform.openai.com/docs/models" - ) - - if config is not None: - self.config = replace(config, model=model_name) # type: ignore - else: - self.config = OpenAIConfig(model=model_name) - - # This is necesssary because of an issue with the OpenAI API. - # Status updates: https://github.com/openai/openai-python/issues/769 - self.create_client = functools.partial( - openai.AsyncOpenAI, - api_key=api_key, - max_retries=max_retries, - timeout=timeout, - ) - - self.system_prompt = system_prompt + self.client = client + self.tokenizer = tokenizer + self.config = config # We count the total number of prompt and generated tokens as returned # by the OpenAI API, summed over all the requests performed with this @@ -157,7 +109,8 @@ def __call__( max_tokens: Optional[int] = None, stop_at: Optional[Union[List[str], str]] = None, *, - temperature: float = 1.0, + system_prompt: Optional[str] = None, + temperature: Optional[float] = None, samples: Optional[int] = None, ) -> np.ndarray: """Call the OpenAI API to generate text. @@ -168,6 +121,11 @@ def __call__( A string or list of strings that will be used to prompt the model max_tokens The maximum number of tokens to generate + stop_at + A string or array of strings which, such that the generation stops + when they are generated. + system_prompt + The content of the system message that precedes the user's prompt. temperature The value of the temperature used to sample tokens samples @@ -176,52 +134,36 @@ def __call__( Up to 4 words where the API will stop the completion. """ + if max_tokens is None: + max_tokens = self.config.max_tokens + if stop_at is None: + stop_at = self.config.stop + if temperature is None: + temperature = self.config.temperature if samples is None: samples = self.config.n - config = replace(self.config, max_tokens=max_tokens, n=samples, stop=stop_at) # type: ignore + config = replace(self.config, max_tokens=max_tokens, temperature=temperature, n=samples, stop=stop_at) # type: ignore - if isinstance(stop_at, list) and len(stop_at) > 4: - raise NotImplementedError( - "The OpenAI API supports at most 4 stop sequences." - ) - - if "text-" in self.config.model: - raise NotImplementedError( - textwrap.dedent( - "Most models that support the legacy completion endpoints will be " - "deprecated on January 2024. Use Chat models instead.\n" - "The list of chat models is available at https://platform.openai.com/docs/guides/text-generation." - ) - ) - if "gpt-" in self.config.model: - client = self.create_client() - response, prompt_tokens, completion_tokens = generate_chat( - prompt, self.system_prompt, client, config - ) - self.prompt_tokens += prompt_tokens - self.completion_tokens += completion_tokens + response, prompt_tokens, completion_tokens = generate_chat( + prompt, system_prompt, self.client, config + ) + self.prompt_tokens += prompt_tokens + self.completion_tokens += completion_tokens - return response + return response def stream(self, *args, **kwargs): raise NotImplementedError( "Streaming is currently not supported for the OpenAI API" ) - @property - def tokenizer(self): - try: - import tiktoken - except ImportError: - raise ImportError( - "The `tiktoken` library needs to be installed in order to choose `outlines.models.openai` with `is_in`" - ) - - return tiktoken.encoding_for_model(self.config.model) - def generate_choice( - self, prompt: str, choices: List[str], max_tokens: Optional[int] = None + self, + prompt: str, + choices: List[str], + max_tokens: Optional[int] = None, + system_prompt: Optional[str] = None, ) -> str: """Call the OpenAI API to generate one of several choices. @@ -233,8 +175,15 @@ def generate_choice( The list of strings between which we ask the model to choose max_tokens The maximum number of tokens to generate + system_prompt + The content of the system message that precedes the user's prompt. """ + if self.tokenizer is None: + raise ValueError( + "You must initialize the `OpenAI` class with a tokenizer to use `outlines.generate.choice`" + ) + config = replace(self.config, max_tokens=max_tokens) greedy = False @@ -262,9 +211,8 @@ def generate_choice( config = replace(config, logit_bias=mask, max_tokens=max_tokens_left) - client = self.create_client() response, prompt_tokens, completion_tokens = generate_chat( - prompt, self.system_prompt, client, config + prompt, system_prompt, self.client, config ) self.prompt_tokens += prompt_tokens self.completion_tokens += completion_tokens @@ -316,7 +264,7 @@ def __repr__(self): async def generate_chat( prompt: str, system_prompt: Union[str, None], - client: "AsyncOpenAI", + client, config: OpenAIConfig, ) -> Tuple[np.ndarray, int, int]: """Call OpenAI's Chat Completion API. @@ -340,14 +288,13 @@ async def generate_chat( """ + @error_handler @cache() async def call_api(prompt, system_prompt, config): responses = await client.chat.completions.create( messages=system_message + user_message, **asdict(config), # type: ignore ) - await client.close() - return responses.model_dump() system_message = ( @@ -365,9 +312,6 @@ async def call_api(prompt, system_prompt, config): return results, usage["prompt_tokens"], usage["completion_tokens"] -openai = OpenAI - - def find_longest_intersection(response: List[int], choice: List[int]) -> List[int]: """Find the longest intersection between the response and the choice.""" for i, (token_r, token_c) in enumerate(zip_longest(response, choice)): @@ -468,3 +412,57 @@ def call(*args, **kwargs): raise e return call + + +def openai( + model_name: str, + api_key: Optional[str] = None, + system_prompt: Optional[str] = None, + config: Optional[OpenAIConfig] = None, +): + try: + import tiktoken + from openai import AsyncOpenAI + except ImportError: + raise ImportError( + "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' OpenAI integration." + ) + + if config is not None: + config = replace(config, model=model_name) # type: ignore + else: + config = OpenAIConfig(model=model_name) + + client = AsyncOpenAI(api_key=api_key) + tokenizer = tiktoken.encoding_for_model(model_name) + + return OpenAI(client, config, tokenizer) + + +def azure_openai( + model_name: str, + azure_endpoint: Optional[str] = None, + api_version: Optional[str] = None, + api_key: Optional[str] = None, + system_prompt: Optional[str] = None, + config: Optional[OpenAIConfig] = None, +): + try: + import tiktoken + from openai import AzureAsyncOpenAI + except ImportError: + raise ImportError( + "The `openai` and `tiktoken` libraries needs to be installed in order to use Outlines' Azure OpenAI integration." + ) + + if config is not None: + config = replace(config, model=model_name) # type: ignore + if config is None: + config = OpenAIConfig(model=model_name) + + client = AzureAsyncOpenAI( + azure_endpoint=azure_endpoint, api_version=api_version, api_key=api_key + ) + tokenizer = tiktoken.encoding_for_model(model_name) + + return OpenAI(client, config, tokenizer) diff --git a/outlines/models/openai_compatible.py b/outlines/models/openai_compatible.py deleted file mode 100644 index 10547bc24..000000000 --- a/outlines/models/openai_compatible.py +++ /dev/null @@ -1,155 +0,0 @@ -"""Integration with custom OpenAI compatible APIs.""" -import functools -import os -from dataclasses import replace -from typing import List, Optional, Union - -import numpy as np - -from outlines.models.openai import OpenAI, OpenAIConfig, generate_chat - -__all__ = ["OpenAICompatibleAPI", "openai_compatible_api"] - - -class OpenAICompatibleAPI(OpenAI): - """An object that represents an OpenAI-compatible API.""" - - def __init__( - self, - model_name: str, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - max_retries: int = 6, - timeout: Optional[float] = None, - system_prompt: Optional[str] = None, - config: Optional[OpenAIConfig] = None, - encoding="gpt-4", # Default for tiktoken, should USUALLY work - ): - """Create an `OpenAI` instance. - - Parameters - ---------- - model_name - Model to use, as defined in OpenAI's documentation - api_key - Secret key to use with the OpenAI compatible API. One can also set the - `INFERENCE_API_KEY` environment variable, or the value of - `openai.api_key`. - base_url - Base URL to use for the API calls. Required if a Custom OpenAI endpoint is used. - Can also be set with the `INFERENCE_BASE_URL` environment variable. - max_retries - The maximum number of retries when calls to the API fail. - timeout - Duration after which the request times out. - system_prompt - The content of the system message that precedes the user's prompt. - config - An instance of `OpenAIConfig`. Can be useful to specify some - parameters that cannot be set by calling this class' methods. - - """ - - try: - import openai - except ImportError: - raise ImportError( - "The `openai` library needs to be installed in order to use Outlines' OpenAI integration." - ) - - if api_key is None: - if os.getenv("INFERENCE_API_KEY") is not None: - api_key = os.getenv("INFERENCE_API_KEY") - elif openai.api_key is not None: - api_key = openai.api_key - else: - raise ValueError( - "You must specify an API key to use the Custom OpenAI API integration." - ) - - if base_url is None: - if os.getenv("INFERENCE_BASE_URL") is not None: - base_url = os.getenv("INFERENCE_BASE_URL") - else: - raise ValueError( - "You must specify a base URL to use the Custom OpenAI API integration." - ) - - if config is not None: - self.config = replace(config, model=model_name) # type: ignore - else: - self.config = OpenAIConfig(model=model_name) - - # This is necesssary because of an issue with the OpenAI API. - # Status updates: https://github.com/openai/openai-python/issues/769 - self.create_client = functools.partial( - openai.AsyncOpenAI, - api_key=api_key, - base_url=base_url, - max_retries=max_retries, - timeout=timeout, - ) - - self.system_prompt = system_prompt - - # We count the total number of prompt and generated tokens as returned - # by the OpenAI API, summed over all the requests performed with this - # model instance. - self.prompt_tokens = 0 - self.completion_tokens = 0 - self.encoding = encoding - - def __call__( - self, - prompt: Union[str, List[str]], - max_tokens: Optional[int] = None, - stop_at: Optional[Union[List[str], str]] = None, - *, - temperature: float = 1.0, - samples: Optional[int] = None, - ) -> np.ndarray: - """Call the OpenAI compatible API to generate text. - - Parameters - ---------- - prompt - A string or list of strings that will be used to prompt the model - max_tokens - The maximum number of tokens to generate - temperature - The value of the temperature used to sample tokens - samples - The number of completions to generate for each prompt - stop_at - Up to 4 words where the API will stop the completion. - - """ - if samples is None: - samples = self.config.n - - config = replace(self.config, max_tokens=max_tokens, n=samples, stop=stop_at, temperature=temperature) # type: ignore - - # We assume it's using the chat completion API style as that's the most commonly supported - client = self.create_client() - response, prompt_tokens, completion_tokens = generate_chat( - prompt, self.system_prompt, client, config - ) - self.prompt_tokens += prompt_tokens - self.completion_tokens += completion_tokens - - return response - - @property - def tokenizer(self): - """Defaults to gpt4, as that seems to work with most custom endpoints. Can be overridden if required in the constructor""" - try: - import tiktoken - except ImportError: - raise ImportError( - "The `tiktoken` library needs to be installed in order to choose `outlines.models.openai` with `is_in`" - ) - - return tiktoken.encoding_for_model(self.encoding) - - -openai_compatible_api = OpenAICompatibleAPI diff --git a/pyproject.toml b/pyproject.toml index d131e8453..62e37e752 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,8 +56,9 @@ test = [ "beartype<0.16.0", "datasets", "responses", - "llama-cpp-python>=0.2.42", - "huggingface_hub" + "llama-cpp-python", + "huggingface_hub", + "openai>=1.0.0" ] serve = [ "vllm>=0.3.0", diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py index c2e885eb1..5ba1b1411 100644 --- a/tests/models/test_openai.py +++ b/tests/models/test_openai.py @@ -1,12 +1,68 @@ +import importlib +from unittest import mock +from unittest.mock import MagicMock + import pytest +from openai import AsyncOpenAI from outlines.models.openai import ( + OpenAI, + OpenAIConfig, build_optimistic_mask, find_longest_intersection, find_response_choices_intersection, ) +def module_patch(path): + """Patch functions that have the same name as the module in which they're implemented.""" + target = path + components = target.split(".") + for i in range(len(components), 0, -1): + try: + # attempt to import the module + imported = importlib.import_module(".".join(components[:i])) + + # module was imported, let's use it in the patch + patch = mock.patch(path) + patch.getter = lambda: imported + patch.attribute = ".".join(components[i:]) + return patch + except Exception: + continue + + # did not find a module, just return the default mock + return mock.patch(path) + + +def test_openai_call(): + with module_patch("outlines.models.openai.generate_chat") as mocked_generate_chat: + mocked_generate_chat.return_value = ["foo"], 1, 2 + async_client = MagicMock(spec=AsyncOpenAI, api_key="key") + + model = OpenAI( + async_client, + "gpt-4", + config=OpenAIConfig(max_tokens=10, temperature=0.5, n=2, stop=["."]), + ) + + assert model("bar")[0] == "foo" + assert model.prompt_tokens == 1 + assert model.completion_tokens == 2 + mocked_generate_chat_args = mocked_generate_chat.call_args + mocked_generate_chat_arg_config = mocked_generate_chat_args[0][3] + assert isinstance(mocked_generate_chat_arg_config, OpenAIConfig) + assert mocked_generate_chat_arg_config.max_tokens == 10 + assert mocked_generate_chat_arg_config.temperature == 0.5 + assert mocked_generate_chat_arg_config.n == 2 + assert mocked_generate_chat_arg_config.stop == ["."] + + model("bar", samples=3) + mocked_generate_chat_args = mocked_generate_chat.call_args + mocked_generate_chat_arg_config = mocked_generate_chat_args[0][3] + assert mocked_generate_chat_arg_config.n == 3 + + @pytest.mark.parametrize( "response,choice,expected_intersection,expected_choices_left", (