Skip to content

Commit

Permalink
removing streamlit.
Browse files Browse the repository at this point in the history
  • Loading branch information
Marc Fabian Mezger committed May 16, 2024
1 parent 0c7d14c commit 478ba4b
Show file tree
Hide file tree
Showing 9 changed files with 3,975 additions and 3,774 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ This is a Rest-Backend for a Conversational Agent, that allows to embedd Documen
- [Vector Database](#vector-database)
- [Qdrant API Key](#qdrant-api-key)
- [Bulk Ingestion](#bulk-ingestion)
- [Update all dependencies](#update-all-dependencies)
- [Star History](#star-history)


Expand Down Expand Up @@ -144,6 +145,12 @@ And you need to change it in the qdrant.yaml file in the config folder.
If you want to ingest large amount of data i would recommend you use the scripts located in agent/ingestion.


## Update all dependencies

```
poetry add langchain@latest loguru@latest omegaconf@latest aleph-alpha-client@latest tiktoken@latest python-dotenv@latest httpx@latest python-multipart@latest fastapi@latest openai@latest grpcio@latest grpcio-tools@latest uvicorn@latest gpt4all@latest nltk@latest pypdfium2@latest lingua-language-detector@latest pyarrow@latest aiohttp@latest gitpython@latest jinja2@latest langchain-openai@latest
```


## Star History

Expand Down
4 changes: 2 additions & 2 deletions agent/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
QAResponse,
SearchResponse,
)
from agent.utils.utility import (
from agent.utils.vdb import (
combine_text_from_list,
create_tmp_folder,
initialize_aleph_alpha_vector_db,
initialize_gpt4all_vector_db,
initialize_open_ai_vector_db,
load_vec_db_conn,
validate_token,
)
from agent.utils.vdb import load_vec_db_conn

# add file logger for loguru
# logger.add("logs/file_{time}.log", backtrace=False, diagnose=False)
Expand Down
92 changes: 0 additions & 92 deletions agent/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,11 @@
from pathlib import Path

from langchain.prompts import PromptTemplate
from langchain_community.vectorstores.qdrant import Qdrant
from lingua import Language, LanguageDetectorBuilder
from loguru import logger
from qdrant_client import models
from qdrant_client.http.exceptions import UnexpectedResponse

from agent.data_model.internal_model import RetrievalResults
from agent.data_model.request_data_model import LLMProvider
from agent.utils.vdb import load_vec_db_conn

# add new languages to detect here
languages = [Language.ENGLISH, Language.GERMAN]
Expand Down Expand Up @@ -193,94 +189,6 @@ def create_tmp_folder() -> str:
return str(tmp_dir)


def initialize_aleph_alpha_vector_db() -> None:
"""Initializes the Aleph Alpha vector db."""
qdrant_client, cfg = load_vec_db_conn()
try:
qdrant_client.get_collection(collection_name=cfg.qdrant.collection_name_aa)
logger.info(f"SUCCESS: Collection {cfg.qdrant.collection_name_aa} already exists.")
except UnexpectedResponse:
generate_collection_aleph_alpha(qdrant_client, collection_name=cfg.qdrant.collection_name_aa, embeddings_size=cfg.aleph_alpha_embeddings.size)


def generate_collection_aleph_alpha(qdrant_client: Qdrant, collection_name: str, embeddings_size: int) -> None:
"""Generate a collection for the Aleph Alpha Backend.
Args:
----
qdrant_client (_type_): _description_
collection_name (_type_): _description_
embeddings_size (_type_): _description_
"""
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=embeddings_size, distance=models.Distance.COSINE),
)
logger.info(f"SUCCESS: Collection {collection_name} created.")


def initialize_open_ai_vector_db() -> None:
"""Initializes the OpenAI vector db.
Args:
----
cfg (DictConfig): Configuration from the file
"""
qdrant_client, cfg = load_vec_db_conn()

try:
qdrant_client.get_collection(collection_name=cfg.qdrant.collection_name_openai)
logger.info(f"SUCCESS: Collection {cfg.qdrant.collection_name_openai} already exists.")
except UnexpectedResponse:
generate_collection_openai(qdrant_client, collection_name=cfg.qdrant.collection_name_openai)


def generate_collection_openai(qdrant_client: Qdrant, collection_name: str) -> None:
"""Generate a collection for the OpenAI Backend.
Args:
----
qdrant_client (_type_): Qdrant Client Langchain.
collection_name (_type_): Name of the Collection
"""
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
)
logger.info(f"SUCCESS: Collection {collection_name} created.")


def initialize_gpt4all_vector_db() -> None:
"""Initializes the GPT4ALL vector db.
Args:
----
cfg (DictConfig): Configuration from the file
"""
qdrant_client, cfg = load_vec_db_conn()

try:
qdrant_client.get_collection(collection_name=cfg.qdrant.collection_name_gpt4all)
logger.info(f"SUCCESS: Collection {cfg.qdrant.collection_name_gpt4all} already exists.")
except UnexpectedResponse:
generate_collection_gpt4all(qdrant_client, collection_name=cfg.qdrant.collection_name_gpt4all)


def generate_collection_gpt4all(qdrant_client: Qdrant, collection_name: str) -> None:
"""Generate a collection for the GPT4ALL Backend.
Args:
----
qdrant_client (Qdrant): Qdrant Client
collection_name (str): Name of the Collection
"""
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE),
)
logger.info(f"SUCCESS: Collection {collection_name} created.")


if __name__ == "__main__":
# test the function
generate_prompt("aleph_alpha_qa.j2", "This is a test text.", "What is the meaning of life?")
93 changes: 91 additions & 2 deletions agent/utils/vdb.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Vector Database Utilities."""
import os

from langchain_community.vectorstores import Qdrant
from langchain_community.vectorstores.qdrant import Qdrant
from langchain_core.embeddings import Embeddings
from loguru import logger
from omegaconf import DictConfig
from qdrant_client import QdrantClient
from qdrant_client import QdrantClient, models
from qdrant_client.http.exceptions import UnexpectedResponse
from ultra_simple_config import load_config


Expand Down Expand Up @@ -36,3 +37,91 @@ def init_vdb(cfg: DictConfig, collection_name: str, embedding: Embeddings) -> Qd
def load_vec_db_conn(cfg: DictConfig) -> QdrantClient:
"""Load the Vector Database Connection."""
return QdrantClient(cfg.qdrant.url, port=cfg.qdrant.port, api_key=os.getenv("QDRANT_API_KEY"), prefer_grpc=cfg.qdrant.prefer_grpc), cfg


def initialize_aleph_alpha_vector_db() -> None:
"""Initializes the Aleph Alpha vector db."""
qdrant_client, cfg = load_vec_db_conn()
try:
qdrant_client.get_collection(collection_name=cfg.qdrant.collection_name_aa)
logger.info(f"SUCCESS: Collection {cfg.qdrant.collection_name_aa} already exists.")
except UnexpectedResponse:
generate_collection_aleph_alpha(qdrant_client, collection_name=cfg.qdrant.collection_name_aa, embeddings_size=cfg.aleph_alpha_embeddings.size)


def generate_collection_aleph_alpha(qdrant_client: Qdrant, collection_name: str, embeddings_size: int) -> None:
"""Generate a collection for the Aleph Alpha Backend.
Args:
----
qdrant_client (_type_): _description_
collection_name (_type_): _description_
embeddings_size (_type_): _description_
"""
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=embeddings_size, distance=models.Distance.COSINE),
)
logger.info(f"SUCCESS: Collection {collection_name} created.")


def initialize_open_ai_vector_db() -> None:
"""Initializes the OpenAI vector db.
Args:
----
cfg (DictConfig): Configuration from the file
"""
qdrant_client, cfg = load_vec_db_conn()

try:
qdrant_client.get_collection(collection_name=cfg.qdrant.collection_name_openai)
logger.info(f"SUCCESS: Collection {cfg.qdrant.collection_name_openai} already exists.")
except UnexpectedResponse:
generate_collection_openai(qdrant_client, collection_name=cfg.qdrant.collection_name_openai)


def generate_collection_openai(qdrant_client: Qdrant, collection_name: str) -> None:
"""Generate a collection for the OpenAI Backend.
Args:
----
qdrant_client (_type_): Qdrant Client Langchain.
collection_name (_type_): Name of the Collection
"""
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
)
logger.info(f"SUCCESS: Collection {collection_name} created.")


def initialize_gpt4all_vector_db() -> None:
"""Initializes the GPT4ALL vector db.
Args:
----
cfg (DictConfig): Configuration from the file
"""
qdrant_client, cfg = load_vec_db_conn()

try:
qdrant_client.get_collection(collection_name=cfg.qdrant.collection_name_gpt4all)
logger.info(f"SUCCESS: Collection {cfg.qdrant.collection_name_gpt4all} already exists.")
except UnexpectedResponse:
generate_collection_gpt4all(qdrant_client, collection_name=cfg.qdrant.collection_name_gpt4all)


def generate_collection_gpt4all(qdrant_client: Qdrant, collection_name: str) -> None:
"""Generate a collection for the GPT4ALL Backend.
Args:
----
qdrant_client (Qdrant): Qdrant Client
collection_name (str): Name of the Collection
"""
qdrant_client.recreate_collection(
collection_name=collection_name,
vectors_config=models.VectorParams(size=384, distance=models.Distance.COSINE),
)
logger.info(f"SUCCESS: Collection {collection_name} created.")
6 changes: 2 additions & 4 deletions frontend/pages/1_upload_pdfs.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
"""The page to upload a pdf."""

# import streamlit as st
# from loguru import logger

# from agent.backend.aleph_alpha_service import embedd_documents_aleph_alpha
# from pathlib import Path


# def upload_files(save_path_input: str) -> list[tuple[str, bytes]]:
Expand All @@ -18,7 +16,7 @@
# return files


# def start_embedding(file_path: str, token: str) -> None:
# # def start_embedding(file_path: str, token: str) -> None:
# """Start the embedding process."""
# embedd_documents_aleph_alpha(dir=file_path, aleph_alpha_token=token)

Expand Down
2 changes: 2 additions & 0 deletions frontend/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
streamlit
loguru
Loading

0 comments on commit 478ba4b

Please sign in to comment.