diff --git a/ailab/models/openai.py b/ailab/models/openai.py index 553f19c..7a8b858 100644 --- a/ailab/models/openai.py +++ b/ailab/models/openai.py @@ -1,59 +1,62 @@ """"Fetch embeddings from the Microsoft Azure OpenAI API""" + import os -import openai + import tiktoken +from dotenv import load_dotenv +from openai import AzureOpenAI -import dotenv -dotenv.load_dotenv() +load_dotenv() # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/embeddings?tabs=python + def safe_get(key): value = os.environ.get(key) if not value: raise Exception(f"Environment variable {key} not defined") return value + OPENAI_API_KEY = safe_get("OPENAI_API_KEY") OPENAI_ENDPOINT = safe_get("OPENAI_ENDPOINT") -openai.api_type = "azure" -openai.api_key = OPENAI_API_KEY -openai.api_base = OPENAI_ENDPOINT -openai.api_version = "2023-05-15" # be sure it's the good one - +client = AzureOpenAI( + api_key=OPENAI_API_KEY, azure_endpoint=OPENAI_ENDPOINT, api_version="2023-05-15" +) enc = tiktoken.get_encoding("cl100k_base") + def fetch_embedding(tokens): """Fetch embedding for a list of tokens from the Microsoft Azure OpenAI API""" OPENAI_API_ENGINE = safe_get("OPENAI_API_ENGINE") - - response = openai.Embedding.create( - input=tokens, - engine=OPENAI_API_ENGINE - ) - embeddings = response['data'][0]['embedding'] + + response = client.embeddings.create(input=tokens, model=OPENAI_API_ENGINE) + embeddings = response["data"][0]["embedding"] return embeddings + # def fetch_tokens_embeddings(text): # tokens = get_tokens_from_text(text) # embeddings = fetch_embedding(tokens) # return (tokens, embeddings) + def get_tokens_from_text(text): tokens = enc.encode(text) return tokens + def get_chat_answer(system_prompt, user_prompt, max_token): OPENAI_API_ENGINE = safe_get("OPENAI_API_ENGINE") - response = openai.ChatCompletion.create( + response = client.chat.completions.create( engine=OPENAI_API_ENGINE, temperature=0, max_tokens=max_token, - messages = [ + messages=[ {"role": "system", "content": system_prompt}, - {"role": "user", "content": (user_prompt)} - ] + {"role": "user", "content": (user_prompt)}, + ], ) return response diff --git a/requirements.txt b/requirements.txt index 54037d9..37cc1d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ idna>=3.4 iniconfig>=2.0.0 multidict>=6.0.4 numpy>=1.25.1 -openai>=0.27.8 +openai>=1.14.3 openpyxl>=3.1.2 packaging>=23.1 pandas>=2.0.3 diff --git a/tests/fetch_embedding.py b/tests/fetch_embedding.py index 90189e8..1062a01 100644 --- a/tests/fetch_embedding.py +++ b/tests/fetch_embedding.py @@ -1,27 +1,29 @@ - -import openai -import os +import os import sys -import dotenv -dotenv.load_dotenv() -openai.api_type = "azure" -openai.api_key = os.environ["OPENAI_API_KEY"] -openai.api_base = f"https://{os.environ['AZURE_OPENAI_SERVICE']}.openai.azure.com" -openai.api_version = "2023-05-15" +from dotenv import load_dotenv +from openai import AzureOpenAI + +load_dotenv() + + +client = AzureOpenAI( + api_key=os.environ["OPENAI_API_KEY"], + azure_endpoint=f"https://{os.environ['AZURE_OPENAI_SERVICE']}.openai.azure.com", + api_version="2023-05-15", +) + def fetch_embedding(text): """Fetch embedding for a list of tokens from the Microsoft Azure OpenAI API""" - response = openai.Embedding.create( - input=text, - engine="ada" - ) - embeddings = response['data'][0]['embedding'] + response = client.embeddings.create(input=text, model="ada") + embeddings = response.data[0].embedding return embeddings -if __name__ == '__main__': + +if __name__ == "__main__": text = " ".join(sys.argv[1:]) if len(text) == 0: - print('Please provide a text to embed') + print("Please provide a text to embed") raise SystemExit - print(fetch_embedding(text)) \ No newline at end of file + print(fetch_embedding(text))