Skip to content

Commit

Permalink
v2.0.22
Browse files Browse the repository at this point in the history
  • Loading branch information
ashpreetbedi committed Nov 1, 2023
1 parent 186ddf3 commit b86d7cb
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 30 deletions.
8 changes: 4 additions & 4 deletions phi/api/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def create_conversation_monitor(monitor: ConversationMonitorCreate) -> bool:
if not phi_cli_settings.api_enabled:
return True

logger.debug("--o-o-- Creating Conversation Monitor")
# logger.debug("--o-o-- Creating Conversation Monitor")
with api.AuthenticatedClient() as api_client:
try:
conversation_workspace = ConversationWorkspace(
Expand All @@ -42,7 +42,7 @@ def create_conversation_monitor(monitor: ConversationMonitorCreate) -> bool:
if response_json is None:
return False

logger.debug(f"Response: {response_json}")
# logger.debug(f"Response: {response_json}")
return True
except Exception as e:
logger.debug(f"Could not create conversation monitor: {e}")
Expand All @@ -53,7 +53,7 @@ def create_conversation_event(conversation: ConversationEventCreate) -> bool:
if not phi_cli_settings.api_enabled:
return True

logger.debug("--o-o-- Creating Conversation Event")
# logger.debug("--o-o-- Creating Conversation Event")
with api.AuthenticatedClient() as api_client:
try:
conversation_workspace = ConversationWorkspace(
Expand All @@ -75,7 +75,7 @@ def create_conversation_event(conversation: ConversationEventCreate) -> bool:
if response_json is None:
return False

logger.debug(f"Response: {response_json}")
# logger.debug(f"Response: {response_json}")
return True
except Exception as e:
logger.debug(f"Could not log conversation event: {e}")
Expand Down
1 change: 1 addition & 0 deletions phi/conversation/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ def get_user_prompt(
_user_prompt += "Respond to the following message"
if self.user_type:
_user_prompt += f" from a '{self.user_type}'"
_user_prompt += ":"
_user_prompt += f"\nUSER: {message}"
_user_prompt += "\nASSISTANT: "

Expand Down
25 changes: 15 additions & 10 deletions phi/document/reader/pdf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import List
from typing import List, Union, IO, Any

from phi.document.base import Document
from phi.document.reader.base import Reader
Expand All @@ -9,21 +9,26 @@
class PDFReader(Reader):
"""Reader for PDF files"""

def read(self, path: Path) -> List[Document]:
if not path:
raise ValueError("No path provided")

if not path.exists():
raise FileNotFoundError(f"Could not find file: {path}")
def read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
if not pdf:
raise ValueError("No pdf provided")

try:
from pypdf import PdfReader as DocumentReader # noqa: F401
except ImportError:
raise ImportError("`pypdf` not installed")

logger.info(f"Reading: {path}")
doc_name = path.name.split(".")[0]
doc_reader = DocumentReader(path)
doc_name = ""
try:
if isinstance(pdf, str):
doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
else:
doc_name = pdf.name.split(".")[0]
except Exception:
doc_name = "pdf"

logger.info(f"Reading: {doc_name}")
doc_reader = DocumentReader(pdf)

documents = [
Document(
Expand Down
11 changes: 7 additions & 4 deletions phi/embedder/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class OpenAIEmbedder(Embedder):

def _response(self, text: str):
if get_from_env("OPENAI_API_KEY") is None:
logger.debug("--o-o-- Using Phidata Servers")
logger.debug("--o-o-- Using Phidata Proxy")
try:
from phi.api.llm import openai_embedding

Expand All @@ -36,11 +36,14 @@ def _response(self, text: str):

def get_embedding(self, text: str) -> List[float]:
response = self._response(text=text)
if "data" not in response:
try:
if "data" not in response:
return []
return response["data"][0]["embedding"]
except Exception as e:
logger.warning(e)
return []

return response["data"][0]["embedding"]

def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
response = self._response(text=text)
if "data" not in response:
Expand Down
36 changes: 32 additions & 4 deletions phi/knowledge/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@ def search(self, query: str, num_documents: Optional[int] = None) -> List[Docume
return self.vector_db.search(query=query, limit=_num_documents)

def load(self, recreate: bool = False) -> None:
"""Load the knowledge base to the vector db
TODO: Use upsert instead of insert
"""
"""Load the knowledge base to the vector db"""

if self.vector_db is None:
logger.warning("No vector db provided")
Expand Down Expand Up @@ -73,6 +70,37 @@ def load(self, recreate: bool = False) -> None:
logger.debug("Optimizing Vector DB")
self.vector_db.optimize()

def load_documents(self, documents: List[List[Document]], recreate: bool = False) -> None:
"""Load documents to the knowledge base
Args:
documents (List[List[Document]]): List of list of documents to load
recreate (bool, optional): Whether to recreate the documents. Defaults to False.
"""

if self.vector_db is None:
logger.warning("No vector db provided")
return

logger.debug("Creating collection")
self.vector_db.create()

logger.info("Loading knowledge base")
num_documents = 0

for document_list in documents:
# Filter out documents which already exist in the vector db
if not recreate:
document_list = [document for document in document_list if not self.vector_db.doc_exists(document)]

self.vector_db.insert(documents=document_list)
num_documents += len(document_list)
logger.info(f"Loaded {num_documents} documents to knowledge base")

if self.optimize_on is not None and num_documents > self.optimize_on:
logger.debug("Optimizing Vector DB")
self.vector_db.optimize()

def exists(self) -> bool:
"""Returns True if the knowledge base exists"""
if self.vector_db is None:
Expand Down
5 changes: 3 additions & 2 deletions phi/knowledge/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from phi.document import Document
from phi.knowledge.base import KnowledgeBase
from phi.utils.log import logger


class CombinedKnowledgeBase(KnowledgeBase):
Expand All @@ -17,5 +18,5 @@ def document_lists(self) -> Iterator[List[Document]]:
"""

for kb in self.sources:
for document_list in kb.document_lists:
yield document_list
logger.debug(f"Loading knowledge base: {kb}")
yield from kb.document_lists
4 changes: 2 additions & 2 deletions phi/knowledge/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def document_lists(self) -> Iterator[List[Document]]:

if _pdf_path.exists() and _pdf_path.is_dir():
for _pdf in _pdf_path.glob("**/*.pdf"):
yield self.reader.read(path=_pdf)
yield self.reader.read(pdf=_pdf)
elif _pdf_path.exists() and _pdf_path.is_file() and _pdf_path.suffix == ".pdf":
yield self.reader.read(path=_pdf_path)
yield self.reader.read(pdf=_pdf_path)


class PDFUrlKnowledgeBase(KnowledgeBase):
Expand Down
2 changes: 1 addition & 1 deletion phi/llm/agent/website.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self, knowledge_base: Optional[WebsiteKnowledgeBase] = None):

def add_website_to_knowledge_base(self, url: str) -> str:
"""This function adds a websites content to the knowledge base.
NOTE: The website must start wit http:// or https:// and should be a valid website.
NOTE: The website must start with https:// and should be a valid website.
USE THIS FUNCTION TO GET INFORMATION ABOUT PRODUCTS FROM THE INTERNET.
Expand Down
4 changes: 2 additions & 2 deletions phi/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def api_kwargs(self) -> Dict[str, Any]:

def invoke_model(self, messages: List[Message]) -> OpenAIObject:
if get_from_env("OPENAI_API_KEY") is None:
logger.debug("--o-o-- Using Phidata Servers")
logger.debug("--o-o-- Using Phidata Proxy")
try:
from phi.api.llm import openai_chat

Expand Down Expand Up @@ -91,7 +91,7 @@ def invoke_model(self, messages: List[Message]) -> OpenAIObject:

def invoke_model_stream(self, messages: List[Message]) -> Iterator[OpenAIObject]:
if get_from_env("OPENAI_API_KEY") is None:
logger.debug("--o-o-- Using Phidata Servers")
logger.debug("--o-o-- Using Phidata Proxy")
try:
from phi.api.llm import openai_chat_stream
from openai import util as openai_util
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "phidata"
version = "2.0.21"
version = "2.0.22"
description = "AI Toolkit for Engineers"
requires-python = ">=3.7"
readme = "README.md"
Expand Down Expand Up @@ -91,6 +91,7 @@ module = [
"botocore.*",
"bs4.*",
"docker.*",
"duckdb.*",
"kubernetes.*",
"openai.*",
"pgvector.*",
Expand Down

0 comments on commit b86d7cb

Please sign in to comment.