Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions server/embedding_generation/local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from embedding_generation.models import EmbeddingGenerator, EmbeddingModel
from sentence_transformers import SentenceTransformer


class SentenceTransformerEmbedding(EmbeddingGenerator):
def __init__(self, model: EmbeddingModel):
self.model_instance = SentenceTransformer(model.name)
self.dimensions = model.dimensions

def encode(self, text_segments: list[str]) -> list[list[float]]:
return self.model_instance.encode(text_segments)
18 changes: 18 additions & 0 deletions server/embedding_generation/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from collections import namedtuple
from typing import Protocol

EmbeddingModel = namedtuple('EmbeddingModel', 'name dimensions')


class EmbeddingGenerator(Protocol):
dimensions: int

def __init__(self, model: EmbeddingModel) -> None:
...

def encode(self, text_segments: list[str]) -> list[list[float]]:
...

ALL_MINILM_L6_V2 = EmbeddingModel("sentence-transformers/all-MiniLM-L6-v2", 384)


17 changes: 17 additions & 0 deletions server/embedding_generation/services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from embedding_generation.models import EmbeddingGenerator, EmbeddingModel

import requests


class HuggingfaceInferenceEndpoingEmbedding(EmbeddingGenerator):
def __init__(self, model: EmbeddingModel, hf_token):
self.url = f'https://api-inference.huggingface.co/pipeline/feature-extraction/{model.name}'
self.headers = {"Authorization": f"Bearer {hf_token}"}
self.dimensions = model.dimensions

def query(self, text_segments):
response = requests.post(self.url, headers=self.headers, json={"inputs": text_segments, "options":{"wait_for_model":True}})
return response.json()

def encode(self, text_segments: list[str]) -> list[list[float]]:
return self.query(text_segments)
9 changes: 1 addition & 8 deletions server/llm/llm.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
from models.models import Document as PsychicDocument, LLM
from typing import List, Any, Optional
import uuid
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from typing import List, Optional
from langchain.llms import GPT4All, LlamaCpp
import os
import requests
import json
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
import openai

embeddings = HuggingFaceEmbeddings(
model_name=os.environ.get("embeddings_model") or "all-MiniLM-L6-v2"
)
embeddings_dimension = 384
base_url = os.environ.get("LLM_URL") or "http://localhost"


Expand Down
2 changes: 1 addition & 1 deletion server/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ async def query(self, query: str) -> List[Document]:

class LLM(BaseModel, ABC):
@abstractmethod
def ask(self, documents: List[str], question: str) -> List[List[float]]:
def ask(self, documents: List[str], question: str) -> str:
pass


Expand Down
1,635 changes: 788 additions & 847 deletions server/poetry.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ pymupdf = "^1.22.5"
openai = "^0.27.8"
supabase = "^1.0.3"
poetry-dotenv-plugin = "^0.2.0"
torch = [
{ platform = "darwin", version="^2.0.1" },
{ platform = "linux", url = "http://download.pytorch.org/whl/cpu/torch-2.0.1%2Bcpu-cp310-cp310-linux_x86_64.whl", markers = "platform_machine == 'x86_64'" }
]
weaviate-client = "^3.23.0"

[tool.poetry.scripts]
start = "server.main:start"
Expand Down
36 changes: 26 additions & 10 deletions server/server/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import os
from embedding_generation.local import SentenceTransformerEmbedding
from embedding_generation.models import ALL_MINILM_L6_V2, EmbeddingModel
from embedding_generation.services import HuggingfaceInferenceEndpoingEmbedding
import uvicorn
from fastapi import FastAPI, File, HTTPException, Depends, Body, UploadFile
from typing import List
Expand Down Expand Up @@ -35,7 +38,17 @@
)

bearer_scheme = HTTPBearer()
vector_store = WeaviateVectorStore() if os.environ.get('USE_WEAVIATE_VECTORSTORE') == 'true' else QdrantVectorStore()
embedding_model_override = EmbeddingModel(os.environ.get("embeddings_model"), os.environ.get("embeddings_size"))

embedding_model = embedding_model_override if embedding_model_override.name else ALL_MINILM_L6_V2

if os.environ.get("embeddings_local"):
embedding_generator = SentenceTransformerEmbedding(embedding_model)
else:
embedding_generator = HuggingfaceInferenceEndpoingEmbedding(embedding_model, os.environ.get("hf_token"))

# TODO: Waviate module needs to be refactored to use embedding abstraction as well
vector_store = WeaviateVectorStore() if os.environ.get('USE_WEAVIATE_VECTORSTORE') == 'true' else QdrantVectorStore(embedding_generator)
llm = get_selected_llm()
db = Database()

Expand Down Expand Up @@ -89,16 +102,19 @@ async def get_previews(

previews: List[FilePreview] = []
for upload_file in upload_files:
pdf_document = fitz.open(upload_file.filename, upload_file.file)
page = pdf_document.load_page(0)
image = page.get_pixmap()
image_bytes = image.tobytes()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
previews.append(
FilePreview(
file_name=upload_file.filename, file_preview_img=image_base64
try:
pdf_document = fitz.open(upload_file.filename, upload_file.file)
page = pdf_document.load_page(0)
image = page.get_pixmap()
image_bytes = image.tobytes()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
previews.append(
FilePreview(
file_name=upload_file.filename, file_preview_img=image_base64
)
)
)
except Exception as e:
print(f'Issue creating preview for file {upload_file.filename}\nError: {e}')
return GetPreviewsResponse(previews=previews)
except Exception as e:
print(e)
Expand Down
17 changes: 6 additions & 11 deletions server/vectorstore/qdrant_vectorstore.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,24 @@
import os
from embedding_generation.models import EmbeddingGenerator
from models.models import Document as PsychicDocument, VectorStore, AppConfig
from typing import List, Any, Optional
import uuid
from qdrant_client import QdrantClient
from langchain.docstore.document import Document
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from qdrant_client.models import PointStruct, Distance, VectorParams, ScoredPoint
from qdrant_client.http import models as rest


embeddings_model = SentenceTransformer(
os.environ.get("embeddings_model") or "all-MiniLM-L6-v2"
)
embeddings_dimension = 384


class QdrantVectorStore(VectorStore):
client: Optional[QdrantClient] = None
collection_name: Optional[str] = None

class Config:
arbitrary_types_allowed = True

def __init__(self):
def __init__(self, embeddings_model: EmbeddingGenerator):
# self.client = Qdrant.from_documents(
# [],
# embeddings,
Expand All @@ -33,6 +27,7 @@ def __init__(self):
# )

super().__init__()
self.embeddings_model = embeddings_model
if not os.getenv("QDRANT_URL"):
raise Exception("QDRANT_URL must be set as an environment variable.")
qdrant_port = os.getenv("QDRANT_PORT") or "6333"
Expand All @@ -46,7 +41,7 @@ def __init__(self):
self.client.create_collection(
collection_name="my_documents",
vectors_config=VectorParams(
size=embeddings_dimension, distance=Distance.COSINE
size=self.embeddings_model.dimensions, distance=Distance.COSINE
),
)
self.collection_name = "my_documents"
Expand Down Expand Up @@ -83,7 +78,7 @@ async def upsert(
)

# TODO: Fix this so that the vector output is of the format PointStruct expects
vector = embeddings_model.encode([doc.page_content])[0]
vector = self.embeddings_model.encode([doc.page_content])[0]
vector = vector.tolist()

points.append(
Expand All @@ -107,7 +102,7 @@ async def upsert(
return True

async def query(self, query: str, app_config: AppConfig) -> List[PsychicDocument]:
query_vector = embeddings_model.encode([query])[0]
query_vector = self.embeddings_model.encode([query])[0]
query_vector = query_vector.tolist()
results = self.client.search(
collection_name=self.collection_name,
Expand Down