Skip to content

Commit

Permalink
Merge pull request #89 from ai-cfia/80-fix-package-setup-referencing-…
Browse files Browse the repository at this point in the history
…louis

issue #80: remove louis reference from `pyproject.toml`
  • Loading branch information
k-allagbe authored Apr 2, 2024
2 parents fd08308 + 2255857 commit 627a7ec
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 79 deletions.
5 changes: 0 additions & 5 deletions ailab/db/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Database functions for the ailab project."""
import hashlib
import logging
import os
import urllib

Expand All @@ -11,9 +10,6 @@
import dotenv
dotenv.load_dotenv()

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

class DBError(Exception):
pass

Expand All @@ -33,7 +29,6 @@ def raise_error(message):

def connect_db():
"""Connect to the postgresql database and return the connection."""
logger.info(f"Connecting to {LOUIS_DSN}")
connection = psycopg.connect(
conninfo=LOUIS_DSN,
row_factory=dict_row,
Expand Down
2 changes: 1 addition & 1 deletion ailab/db/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def search(cursor, query_embedding):
%(match_count)s, %(weights)s::JSONB)
""", data)
# turn into list of dict now to preserve dictionaries
return [dict(r) for r in cursor.fetchall()]
return [dict(r) for r in cursor.fetchall()[0]["search"]]

def search_from_text_query(cursor, query):
"""Encode the query before doing the search and return a dict."""
Expand Down
40 changes: 21 additions & 19 deletions ailab/models/openai.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,61 @@
""""Fetch embeddings from the Microsoft Azure OpenAI API"""

import os
import openai

import tiktoken
from dotenv import load_dotenv
from openai import AzureOpenAI

import dotenv
dotenv.load_dotenv()
load_dotenv()

# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/embeddings?tabs=python


def safe_get(key):
value = os.environ.get(key)
if not value:
raise Exception(f"Environment variable {key} not defined")
return value


OPENAI_API_KEY = safe_get("OPENAI_API_KEY")
OPENAI_ENDPOINT = safe_get("OPENAI_ENDPOINT")

openai.api_type = "azure"
openai.api_key = OPENAI_API_KEY
openai.api_base = OPENAI_ENDPOINT
openai.api_version = "2023-05-15" # be sure it's the good one

client = AzureOpenAI(
api_key=OPENAI_API_KEY, azure_endpoint=OPENAI_ENDPOINT, api_version="2023-05-15"
)
enc = tiktoken.get_encoding("cl100k_base")


def fetch_embedding(tokens):
"""Fetch embedding for a list of tokens from the Microsoft Azure OpenAI API"""
OPENAI_API_ENGINE = safe_get("OPENAI_API_ENGINE")

response = openai.Embedding.create(
input=tokens,
engine=OPENAI_API_ENGINE
)
embeddings = response['data'][0]['embedding']
return embeddings

response = client.embeddings.create(input=tokens, model=OPENAI_API_ENGINE)
return response.data[0].embedding


# def fetch_tokens_embeddings(text):
# tokens = get_tokens_from_text(text)
# embeddings = fetch_embedding(tokens)
# return (tokens, embeddings)


def get_tokens_from_text(text):
tokens = enc.encode(text)
return tokens


def get_chat_answer(system_prompt, user_prompt, max_token):
OPENAI_API_ENGINE = safe_get("OPENAI_API_ENGINE")

response = openai.ChatCompletion.create(
response = client.chat.completions.create(
engine=OPENAI_API_ENGINE,
temperature=0,
max_tokens=max_token,
messages = [
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": (user_prompt)}
]
{"role": "user", "content": (user_prompt)},
],
)
return response
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers = [
dynamic = ["dependencies"]

[tool.setuptools]
packages = ["louis", "ailab.db", "louis.models"]
packages = ["ailab", "ailab.db", "ailab.models", "ailab.db.api", "ailab.db.crawler"]

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}
Expand Down
72 changes: 36 additions & 36 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,36 +1,36 @@
aiohttp==3.9.0
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
certifi==2023.7.22
charset-normalizer==3.2.0
et-xmlfile==1.1.0
frozenlist==1.4.0
idna==3.4
iniconfig==2.0.0
multidict==6.0.4
numpy==1.25.1
openai==0.27.8
openpyxl==3.1.2
packaging==23.1
pandas==2.0.3
pandas-stubs==2.0.2.230605
pgvector==0.2.0
pluggy==1.2.0
psycopg==3.1.9
pytest==7.4.0
python-dateutil==2.8.2
python-dotenv==1.0.0
pytz==2023.3
regex==2023.6.3
requests==2.31.0
six==1.16.0
tiktoken==0.4.0
tqdm==4.65.0
types-pytz==2023.3.0.0
typing_extensions==4.7.1
tzdata==2023.3
urllib3==2.0.7
yarl==1.9.2
microbench==0.8
semver==3.0.2
aiohttp>=3.9.0
aiosignal>=1.3.1
async-timeout>=4.0.2
attrs>=23.1.0
certifi>=2023.7.22
charset-normalizer>=3.2.0
et-xmlfile>=1.1.0
frozenlist>=1.4.0
idna>=3.4
iniconfig>=2.0.0
multidict>=6.0.4
numpy>=1.25.1
openai>=1.14.3
openpyxl>=3.1.2
packaging>=23.1
pandas>=2.0.3
pandas-stubs>=2.0.2.230605
pgvector>=0.2.0
pluggy>=1.2.0
psycopg>=3.1.9
pytest>=7.4.0
python-dateutil>=2.8.2
python-dotenv>=1.0.0
pytz>=2023.3
regex>=2023.6.3
requests>=2.31.0
six>=1.16.0
tiktoken>=0.4.0
tqdm>=4.65.0
types-pytz>=2023.3.0.0
typing_extensions>=4.7.1
tzdata>=2023.3
urllib3>=2.0.7
yarl>=1.9.2
microbench>=0.8
semver>=3.0.2
36 changes: 19 additions & 17 deletions tests/fetch_embedding.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@

import openai
import os
import os
import sys
import dotenv
dotenv.load_dotenv()

openai.api_type = "azure"
openai.api_key = os.environ["OPENAI_API_KEY"]
openai.api_base = f"https://{os.environ['AZURE_OPENAI_SERVICE']}.openai.azure.com"
openai.api_version = "2023-05-15"
from dotenv import load_dotenv
from openai import AzureOpenAI

load_dotenv()


client = AzureOpenAI(
api_key=os.environ["OPENAI_API_KEY"],
azure_endpoint=f"https://{os.environ['AZURE_OPENAI_SERVICE']}.openai.azure.com",
api_version="2023-05-15",
)


def fetch_embedding(text):
"""Fetch embedding for a list of tokens from the Microsoft Azure OpenAI API"""
response = openai.Embedding.create(
input=text,
engine="ada"
)
embeddings = response['data'][0]['embedding']
response = client.embeddings.create(input=text, model="ada")
embeddings = response.data[0].embedding
return embeddings

if __name__ == '__main__':

if __name__ == "__main__":
text = " ".join(sys.argv[1:])
if len(text) == 0:
print('Please provide a text to embed')
print("Please provide a text to embed")
raise SystemExit
print(fetch_embedding(text))
print(fetch_embedding(text))

0 comments on commit 627a7ec

Please sign in to comment.