diff --git a/phi/assistant/assistant.py b/phi/assistant/assistant.py index ba9672e1c..29ca74d59 100644 --- a/phi/assistant/assistant.py +++ b/phi/assistant/assistant.py @@ -150,7 +150,9 @@ def create(self) -> "Assistant": _file_ids = self.file_ids or [] if self.files is not None: for _file in self.files: - _file_ids.append(_file.get_id()) + _file = _file.get_or_create() + if _file.id is not None: + _file_ids.append(_file.id) request_body["file_ids"] = _file_ids if self.metadata is not None: request_body["metadata"] = self.metadata @@ -223,7 +225,13 @@ def update(self) -> "Assistant": _file_ids = self.file_ids or [] if self.files is not None: for _file in self.files: - _file_ids.append(_file.get_id()) + try: + _file = _file.get() + if _file.id is not None: + _file_ids.append(_file.id) + except Exception as e: + logger.warning(f"Unable to get file: {e}") + continue request_body["file_ids"] = _file_ids if self.metadata: request_body["metadata"] = self.metadata @@ -267,6 +275,7 @@ def to_dict(self) -> Dict[str, Any]: "tools", "file_ids", "files", + "created_at", }, ) diff --git a/phi/assistant/file/file.py b/phi/assistant/file/file.py index 32271258c..ec949b938 100644 --- a/phi/assistant/file/file.py +++ b/phi/assistant/file/file.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any, Optional, Dict from typing_extensions import Literal from pydantic import BaseModel, ConfigDict @@ -31,6 +31,10 @@ class File(BaseModel): # Supported values are fine-tune, fine-tune-results, assistants, and assistants_output. purpose: Literal["fine-tune", "assistants"] = "assistants" + # The current status of the file, which can be either `uploaded`, `processed`, or `error`. + status: Optional[Literal["uploaded", "processed", "error"]] = None + status_details: Optional[str] = None + # The Unix timestamp (in seconds) for when the file was created. created_at: Optional[int] = None @@ -46,48 +50,121 @@ def client(self) -> OpenAI: def read(self) -> Any: raise NotImplementedError + def write(self, content: Any) -> Any: + raise NotImplementedError + + def get_filename(self) -> Optional[str]: + return self.filename + + def load_from_storage(self): + pass + def load_from_openai(self, openai_file: OpenAIFile): self.id = openai_file.id self.object = openai_file.object + self.bytes = openai_file.bytes self.created_at = openai_file.created_at + self.filename = openai_file.filename + self.status = openai_file.status + self.status_details = openai_file.status_details - def upload(self) -> OpenAIFile: + def create(self) -> "File": self.openai_file = self.client.files.create(file=self.read(), purpose=self.purpose) self.load_from_openai(self.openai_file) + logger.debug(f"File created: {self.openai_file.id}") + logger.debug(f"File: {self.openai_file}") + return self + + def get_id(self) -> Optional[str]: + _id = self.id or self.openai_file.id if self.openai_file else None + if _id is None: + self.load_from_storage() + _id = self.id + return _id + + def get_using_filename(self) -> Optional[OpenAIFile]: + file_list = self.client.files.list(purpose=self.purpose) + file_name = self.get_filename() + if file_name is None: + return None + + logger.debug(f"Getting id for: {file_name}") + for file in file_list: + if file.filename == file_name: + logger.debug(f"Found: {file.id}") + return file + return None + + def get_from_openai(self) -> OpenAIFile: + _file_id = self.get_id() + if _file_id is None: + oai_file = self.get_using_filename() + else: + oai_file = self.client.files.retrieve(file_id=_file_id) + + if oai_file is None: + raise FileIdNotSet("File.id not set") + + self.openai_file = oai_file + self.load_from_openai(self.openai_file) return self.openai_file - def download(self, use_cache: bool = True) -> str: + def get(self, use_cache: bool = True) -> "File": + if self.openai_file is not None and use_cache: + return self + + self.get_from_openai() + return self + + def get_or_create(self, use_cache: bool = True) -> "File": try: - file_to_download = self.get(use_cache=use_cache) + return self.get(use_cache=use_cache) + except FileIdNotSet: + return self.create() + + def download(self): + try: + file_to_download = self.get_from_openai() if file_to_download is not None: content = self.client.files.retrieve_content(file_id=file_to_download.id) - return content + self.write(content) except FileIdNotSet: logger.warning("File not available") raise - def get(self, use_cache: bool = True) -> OpenAIFile: - if self.openai_file is not None and use_cache: - return self.openai_file - - _file_id = self.id or self.openai_file.id if self.openai_file else None - if _file_id is not None: - self.openai_file = self.client.files.retrieve(file_id=_file_id) - self.load_from_openai(self.openai_file) - return self.openai_file - raise FileIdNotSet("File.id not set") - - def get_id(self) -> str: - return self.get().id - def delete(self) -> OpenAIFileDeleted: try: - file_to_delete = self.get() + file_to_delete = self.get_from_openai() if file_to_delete is not None: deletion_status = self.client.files.delete( file_id=file_to_delete.id, ) + logger.debug(f"File deleted: {file_to_delete.id}") return deletion_status except FileIdNotSet: logger.warning("File not available") raise + + def to_dict(self) -> Dict[str, Any]: + return self.model_dump( + exclude_none=True, + include={ + "filename", + "id", + "object", + "bytes", + "purpose", + "created_at", + }, + ) + + def pprint(self): + """Pretty print using rich""" + from rich.pretty import pprint + + pprint(self.to_dict()) + + def __str__(self) -> str: + import json + + return json.dumps(self.to_dict(), indent=4) diff --git a/phi/assistant/file/local.py b/phi/assistant/file/local.py new file mode 100644 index 000000000..16de4ffeb --- /dev/null +++ b/phi/assistant/file/local.py @@ -0,0 +1,22 @@ +from pathlib import Path +from typing import Any, Union, Optional + +from phi.assistant.file import File +from phi.utils.log import logger + + +class LocalFile(File): + path: Union[str, Path] + + @property + def filepath(self) -> Path: + if isinstance(self.path, str): + return Path(self.path) + return self.path + + def read(self) -> Any: + logger.debug(f"Reading file: {self.filepath}") + return self.filepath.open("rb") + + def get_filename(self) -> Optional[str]: + return self.filepath.name or self.filename diff --git a/phi/assistant/message.py b/phi/assistant/message.py index 9ac32d2f7..7af6715a8 100644 --- a/phi/assistant/message.py +++ b/phi/assistant/message.py @@ -86,7 +86,9 @@ def create(self, thread_id: Optional[str] = None) -> "Message": _file_ids = self.file_ids or [] if self.files: for _file in self.files: - _file_ids.append(_file.get_id()) + _file = _file.get_or_create() + if _file.id is not None: + _file_ids.append(_file.id) request_body["file_ids"] = _file_ids if self.metadata is not None: request_body["metadata"] = self.metadata @@ -159,6 +161,19 @@ def update(self, thread_id: Optional[str] = None) -> "Message": logger.warning("Message not available") raise + def get_content_text(self) -> str: + if isinstance(self.content, str): + return self.content + + content_str = "" + content_list = self.content or (self.openai_message.content if self.openai_message else None) + if content_list is not None: + for content in content_list: + if content.type == "text": + text = content.text + content_str += text.value + return content_str + def to_dict(self) -> Dict[str, Any]: return self.model_dump( exclude_none=True, diff --git a/phi/assistant/thread.py b/phi/assistant/thread.py index 41216180e..1172f8967 100644 --- a/phi/assistant/thread.py +++ b/phi/assistant/thread.py @@ -208,12 +208,27 @@ def pprint(self): pprint(self.to_dict()) - def print_response(self, message: str, assistant: Assistant) -> None: - from phi.cli.console import console + def print_messages(self) -> None: from rich.table import Table from rich.box import ROUNDED from rich.markdown import Markdown + from phi.cli.console import console + + # Get the messages from the thread + messages = self.get_messages() + # Print the response + table = Table(box=ROUNDED, border_style="blue") + for m in messages[::-1]: + if m.role == "user": + table.add_column("User") + table.add_column(m.get_content_text()) + if m.role == "assistant": + table.add_row("Assistant", Markdown(m.get_content_text())) + table.add_section() + console.print(table) + + def print_response(self, message: str, assistant: Assistant) -> None: # Start the response timer response_timer = Timer() response_timer.start() @@ -227,27 +242,7 @@ def print_response(self, message: str, assistant: Assistant) -> None: # Stop the response timer response_timer.stop() - # Get the messages from the thread - messages = self.get_messages() - - # Get the assistant response - assistant_response: str = "" - for m in messages: - oai_message = m.openai_message - if oai_message and oai_message.role == "assistant": - for content in oai_message.content: - if content.type == "text": - text = content.text - assistant_response += text.value - break - - # Convert to markdown - md_response = Markdown(assistant_response) - table = Table(box=ROUNDED, border_style="blue") - table.add_column("Message") - table.add_column(message) - table.add_row(f"Response\n({response_timer.elapsed:.1f}s)", md_response) - console.print(table) + self.print_messages() def __str__(self) -> str: import json diff --git a/phi/assistant/tool/arxiv.py b/phi/assistant/tool/arxiv.py new file mode 100644 index 000000000..289b35fc8 --- /dev/null +++ b/phi/assistant/tool/arxiv.py @@ -0,0 +1,53 @@ +import json +from typing import List, Optional + +from phi.document import Document +from phi.knowledge.arxiv import ArxivKnowledgeBase +from phi.assistant.tool.registry import ToolRegistry +from phi.utils.log import logger + + +class ArxivTools(ToolRegistry): + def __init__(self, knowledge_base: Optional[ArxivKnowledgeBase] = None): + super().__init__(name="arxiv_tools") + self.knowledge_base: Optional[ArxivKnowledgeBase] = knowledge_base + + if self.knowledge_base is not None and isinstance(self.knowledge_base, ArxivKnowledgeBase): + self.register(self.search_arxiv_and_update_knowledge_base) + else: + self.register(self.search_arxiv) + + def search_arxiv_and_update_knowledge_base(self, topic: str) -> str: + """This function searches arXiv for a topic, adds the results to the knowledge base and returns them. + + USE THIS FUNCTION TO GET INFORMATION WHICH DOES NOT EXIST. + + :param topic: The topic to search arXiv and add to knowledge base. + :return: Relevant documents from arXiv knowledge base. + """ + if self.knowledge_base is None: + return "Knowledge base not provided" + + logger.debug(f"Adding to knowledge base: {topic}") + self.knowledge_base.queries.append(topic) + logger.debug("Loading knowledge base.") + self.knowledge_base.load(recreate=False) + logger.debug(f"Searching knowledge base: {topic}") + relevant_docs: List[Document] = self.knowledge_base.search(query=topic) + return json.dumps([doc.to_dict() for doc in relevant_docs]) + + def search_arxiv(self, query: str, max_results: int = 5) -> str: + """ + Searches arXiv for a query. + + :param query: The query to search for. + :param max_results: The maximum number of results to return. + :return: Relevant documents from arXiv. + """ + from phi.document.reader.arxiv import ArxivReader + + arxiv = ArxivReader(max_results=max_results) + + logger.debug(f"Searching arxiv for: {query}") + relevant_docs: List[Document] = arxiv.read(query=query) + return json.dumps([doc.to_dict() for doc in relevant_docs]) diff --git a/phi/assistant/tool/duckdb.py b/phi/assistant/tool/duckdb.py new file mode 100644 index 000000000..04149c0fc --- /dev/null +++ b/phi/assistant/tool/duckdb.py @@ -0,0 +1,339 @@ +from typing import Optional, Tuple + +from phi.assistant.tool.registry import ToolRegistry +from phi.utils.log import logger + +try: + import duckdb +except ImportError: + raise ImportError("`duckdb` not installed. Please install it using `pip install duckdb`.") + + +class DuckDbTools(ToolRegistry): + def __init__( + self, + db_path: str = ":memory:", + s3_region: str = "us-east-1", + duckdb_connection: Optional[duckdb.DuckDBPyConnection] = None, + ): + super().__init__(name="duckdb_tools") + + self.db_path: str = db_path + self.s3_region: str = s3_region + self._duckdb_connection: Optional[duckdb.DuckDBPyConnection] = duckdb_connection + + self.register(self.run_duckdb_query) + self.register(self.show_tables) + self.register(self.describe_table) + self.register(self.inspect_query) + self.register(self.describe_table_or_view) + self.register(self.export_table_as) + self.register(self.summarize_table) + self.register(self.create_fts_index) + self.register(self.full_text_search) + + @property + def duckdb_connection(self) -> duckdb.DuckDBPyConnection: + """ + Returns the duckdb connection + + :return duckdb.DuckDBPyConnection: duckdb connection + """ + if self._duckdb_connection is None: + self._duckdb_connection = duckdb.connect(self.db_path) + try: + self._duckdb_connection.sql("INSTALL httpfs;") + self._duckdb_connection.sql("LOAD httpfs;") + self._duckdb_connection.sql(f"SET s3_region='{self.s3_region}';") + except Exception as e: + logger.exception(e) + logger.warning("Failed to install httpfs extension. Only local files will be supported") + + return self._duckdb_connection + + def run_duckdb_query(self, query: str) -> str: + """Function to run SQL queries against a duckdb database + + :param query: SQL query to run + :return: Result of the query + """ + + # -*- Format the SQL Query + # Remove backticks + formatted_sql = query.replace("`", "") + # If there are multiple statements, only run the first one + formatted_sql = formatted_sql.split(";")[0] + + try: + logger.debug(f"Running query: {formatted_sql}") + + query_result = self.duckdb_connection.sql(formatted_sql) + result_output = "No output" + if query_result is not None: + try: + results_as_python_objects = query_result.fetchall() + result_rows = [] + for row in results_as_python_objects: + if len(row) == 1: + result_rows.append(str(row[0])) + else: + result_rows.append(",".join(str(x) for x in row)) + + result_data = "\n".join(result_rows) + result_output = ",".join(query_result.columns) + "\n" + result_data + except AttributeError: + result_output = str(query_result) + + logger.debug(f"Query result: {result_output}") + return result_output + except duckdb.ProgrammingError as e: + return str(e) + except duckdb.Error as e: + return str(e) + except Exception as e: + return str(e) + + def show_tables(self) -> str: + """Function to show tables in the database + + :return: List of tables in the database + """ + stmt = "SHOW TABLES;" + tables = self.run_duckdb_query(stmt) + logger.debug(f"Tables: {tables}") + return tables + + def describe_table(self, table: str) -> str: + """Function to describe a table + + :param table: Table to describe + :return: Description of the table + """ + stmt = f"DESCRIBE {table};" + table_description = self.run_duckdb_query(stmt) + + logger.debug(f"Table description: {table_description}") + return f"{table}\n{table_description}" + + def summarize_table(self, table: str) -> str: + """Function to summarize the contents of a table + + :param table: Table to describe + :return: Description of the table + """ + stmt = f"SUMMARIZE SELECT * FROM {table};" + table_description = self.run_duckdb_query(stmt) + + logger.debug(f"Table description: {table_description}") + return f"{table}\n{table_description}" + + def inspect_query(self, query: str) -> str: + """Function to inspect a query and return the query plan. Always inspect your query before running them. + + :param query: Query to inspect + :return: Qeury plan + """ + stmt = f"explain {query};" + explain_plan = self.run_duckdb_query(stmt) + + logger.debug(f"Explain plan: {explain_plan}") + return explain_plan + + def describe_table_or_view(self, table: str): + """Function to describe a table or view + + :param table: Table or view to describe + :return: Description of the table or view + """ + stmt = f"select column_name, data_type from information_schema.columns where table_name='{table}';" + table_description = self.run_duckdb_query(stmt) + + logger.debug(f"Table description: {table_description}") + return f"{table}\n{table_description}" + + def load_local_path_to_table(self, path: str, table_name: Optional[str] = None) -> Tuple[str, str]: + """Load a local file into duckdb + + :param path: Path to load + :param table_name: Optional table name to use + :return: Table name, SQL statement used to load the file + """ + import os + + logger.debug(f"Loading {path} into duckdb") + + if table_name is None: + # Get the file name from the s3 path + file_name = path.split("/")[-1] + # Get the file name without extension from the s3 path + table_name, extension = os.path.splitext(file_name) + # If the table_name isn't a valid SQL identifier, we'll need to use something else + table_name = table_name.replace("-", "_").replace(".", "_").replace(" ", "_").replace("/", "_") + + create_statement = f"CREATE OR REPLACE TABLE '{table_name}' AS SELECT * FROM '{path}';" + self.run_duckdb_query(create_statement) + + logger.debug(f"Loaded {path} into duckdb as {table_name}") + # self.run_duckdb_query(f"SELECT * from {table_name};") + return table_name, create_statement + + def load_local_csv_to_table( + self, path: str, table_name: Optional[str] = None, delimiter: Optional[str] = None + ) -> Tuple[str, str]: + """Load a local CSV file into duckdb + + :param path: Path to load + :param table_name: Optional table name to use + :param delimiter: Optional delimiter to use + :return: Table name, SQL statement used to load the file + """ + import os + + logger.debug(f"Loading {path} into duckdb") + + if table_name is None: + # Get the file name from the s3 path + file_name = path.split("/")[-1] + # Get the file name without extension from the s3 path + table_name, extension = os.path.splitext(file_name) + # If the table_name isn't a valid SQL identifier, we'll need to use something else + table_name = table_name.replace("-", "_").replace(".", "_").replace(" ", "_").replace("/", "_") + + select_statement = f"SELECT * FROM read_csv('{path}'" + if delimiter is not None: + select_statement += f", delim='{delimiter}')" + else: + select_statement += ")" + + create_statement = f"CREATE OR REPLACE TABLE '{table_name}' AS {select_statement};" + self.run_duckdb_query(create_statement) + + logger.debug(f"Loaded CSV {path} into duckdb as {table_name}") + # self.run_duckdb_query(f"SELECT * from {table_name};") + return table_name, create_statement + + def load_s3_path_to_table(self, s3_path: str, table_name: Optional[str] = None) -> Tuple[str, str]: + """Load a file from S3 into duckdb + + :param s3_path: S3 path to load + :param table_name: Optional table name to use + :return: Table name, SQL statement used to load the file + """ + import os + + logger.debug(f"Loading {s3_path} into duckdb") + + if table_name is None: + # Get the file name from the s3 path + file_name = s3_path.split("/")[-1] + # Get the file name without extension from the s3 path + table_name, extension = os.path.splitext(file_name) + # If the table_name isn't a valid SQL identifier, we'll need to use something else + table_name = table_name.replace("-", "_").replace(".", "_").replace(" ", "_").replace("/", "_") + + create_statement = f"CREATE OR REPLACE TABLE '{table_name}' AS SELECT * FROM '{s3_path}';" + self.run_duckdb_query(create_statement) + + logger.debug(f"Loaded {s3_path} into duckdb as {table_name}") + # self.run_duckdb_query(f"SELECT * from {table_name};") + return table_name, create_statement + + def load_s3_csv_to_table( + self, s3_path: str, table_name: Optional[str] = None, delimiter: Optional[str] = None + ) -> Tuple[str, str]: + """Load a CSV file from S3 into duckdb + + :param s3_path: S3 path to load + :param table_name: Optional table name to use + :return: Table name, SQL statement used to load the file + """ + import os + + logger.debug(f"Loading {s3_path} into duckdb") + + if table_name is None: + # Get the file name from the s3 path + file_name = s3_path.split("/")[-1] + # Get the file name without extension from the s3 path + table_name, extension = os.path.splitext(file_name) + # If the table_name isn't a valid SQL identifier, we'll need to use something else + table_name = table_name.replace("-", "_").replace(".", "_").replace(" ", "_").replace("/", "_") + + select_statement = f"SELECT * FROM read_csv('{s3_path}'" + if delimiter is not None: + select_statement += f", delim='{delimiter}')" + else: + select_statement += ")" + + create_statement = f"CREATE OR REPLACE TABLE '{table_name}' AS {select_statement};" + self.run_duckdb_query(create_statement) + + logger.debug(f"Loaded CSV {s3_path} into duckdb as {table_name}") + # self.run_duckdb_query(f"SELECT * from {table_name};") + return table_name, create_statement + + def export_table_as(self, table_name: str, format: Optional[str] = "PARQUET", path: Optional[str] = None) -> str: + """Save a table to a desired format + The function will use the default format as parquet + If the path is provided, the table will be exported to that path, example s3 + + :param table_name: Table to export + :param format: Format to export to + :param path: Path to export to + :return: None + """ + if format is None: + format = "PARQUET" + + logger.debug(f"Exporting Table {table_name} as {format.upper()} in the path {path}") + # self.run_duckdb_query(f"SELECT * from {table_name};") + if path is None: + path = f"{table_name}.{format}" + else: + path = f"{path}/{table_name}.{format}" + export_statement = f"COPY (SELECT * FROM {table_name}) TO '{path}' (FORMAT {format.upper()});" + result = self.run_duckdb_query(export_statement) + logger.debug(f"Exported {table_name} to {path}/{table_name}") + + return result + + def create_fts_index(self, table_name: str, unique_key: str, input_values: list[str]) -> str: + """Create a full text search index on a table + + :param table_name: Table to create the index on + :param unique_key: Unique key to use + :param input_values: Values to index + :return: None + """ + logger.debug(f"Creating FTS index on {table_name} for {input_values}") + self.run_duckdb_query("INSTALL fts;") + logger.debug("Installed FTS extension") + self.run_duckdb_query("LOAD fts;") + logger.debug("Loaded FTS extension") + + create_fts_index_statement = f"PRAGMA create_fts_index('{table_name}', '{unique_key}', '{input_values}');" + logger.debug(f"Running {create_fts_index_statement}") + result = self.run_duckdb_query(create_fts_index_statement) + logger.debug(f"Created FTS index on {table_name} for {input_values}") + + return result + + def full_text_search(self, table_name: str, unique_key: str, search_text: str) -> str: + """Full text Search in a table column for a specific text/keyword + + :param table_name: Table to search + :param unique_key: Unique key to use + :param search_text: Text to search + :return: None + """ + logger.debug(f"Running full_text_search for {search_text} in {table_name}") + search_text_statement = f"""SELECT fts_main_corpus.match_bm25({unique_key}, '{search_text}') AS score,* + FROM {table_name} + WHERE score IS NOT NULL + ORDER BY score;""" + + logger.debug(f"Running {search_text_statement}") + result = self.run_duckdb_query(search_text_statement) + logger.debug(f"Search results for {search_text} in {table_name}") + + return result diff --git a/phi/assistant/tool/email.py b/phi/assistant/tool/email.py new file mode 100644 index 000000000..284a8e220 --- /dev/null +++ b/phi/assistant/tool/email.py @@ -0,0 +1,59 @@ +from typing import Optional + +from phi.assistant.tool.registry import ToolRegistry +from phi.utils.log import logger + + +class EmailTools(ToolRegistry): + def __init__( + self, + receiver_email: Optional[str] = None, + sender_name: Optional[str] = None, + sender_email: Optional[str] = None, + sender_passkey: Optional[str] = None, + ): + super().__init__(name="email_tools") + self.receiver_email: Optional[str] = receiver_email + self.sender_name: Optional[str] = sender_name + self.sender_email: Optional[str] = sender_email + self.sender_passkey: Optional[str] = sender_passkey + self.register(self.email_user) + + def email_user(self, subject: str, body: str) -> str: + """Emails the user with the given subject and body. + + :param subject: The subject of the email. + :param body: The body of the email. + :return: "success" if the email was sent successfully, "error: [error message]" otherwise. + """ + try: + import smtplib + from email.message import EmailMessage + except ImportError: + logger.error("`smtplib` not installed") + raise + + if not self.receiver_email: + return "error: No receiver email provided" + if not self.sender_name: + return "error: No sender name provided" + if not self.sender_email: + return "error: No sender email provided" + if not self.sender_passkey: + return "error: No sender passkey provided" + + msg = EmailMessage() + msg["Subject"] = subject + msg["From"] = f"{self.sender_name} <{self.sender_email}>" + msg["To"] = self.receiver_email + msg.set_content(body) + + logger.info(f"Sending Email to {self.receiver_email}") + try: + with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp: + smtp.login(self.sender_email, self.sender_passkey) + smtp.send_message(msg) + except Exception as e: + logger.error(f"Error sending email: {e}") + return f"error: {e}" + return "email sent successfully" diff --git a/phi/assistant/tool/phi.py b/phi/assistant/tool/phi.py new file mode 100644 index 000000000..c691ffe7a --- /dev/null +++ b/phi/assistant/tool/phi.py @@ -0,0 +1,116 @@ +import uuid +from typing import Optional + +from phi.assistant.tool.registry import ToolRegistry +from phi.utils.log import logger + + +class PhiTools(ToolRegistry): + def __init__(self): + super().__init__(name="phi_tools") + self.register(self.create_new_app) + self.register(self.start_user_workspace) + self.register(self.validate_phi_is_ready) + + def validate_phi_is_ready(self) -> bool: + """Validates that Phi is ready to run commands. + + :return: True if Phi is ready, False otherwise. + """ + # Check if docker is running + return True + + def create_new_app(self, template: str, workspace_name: str) -> str: + """Creates a new phidata workspace for a given application template. + Use this function when the user wants to create a new "llm-app", "api-app", "django-app", or "streamlit-app". + Remember to provide a name for the new workspace. + You can use the format: "template-name" + name of an interesting person (lowercase, no spaces). + + :param template: (required) The template to use for the new application. + One of: llm-app, api-app, django-app, streamlit-app + :param workspace_name: (required) The name of the workspace to create for the new application. + :return: Status of the function or next steps. + """ + from phi.workspace.operator import create_workspace, TEMPLATE_TO_NAME_MAP, WorkspaceStarterTemplate + + ws_template: Optional[WorkspaceStarterTemplate] = None + if template.lower() in WorkspaceStarterTemplate.__members__.values(): + ws_template = WorkspaceStarterTemplate(template) + + if ws_template is None: + return f"Error: Invalid template: {template}, must be one of: llm-app, api-app, django-app, streamlit-app" + + ws_dir_name: Optional[str] = workspace_name + if ws_dir_name is None: + # Get default_ws_name from template + default_ws_name: Optional[str] = TEMPLATE_TO_NAME_MAP.get(ws_template) + # Add a 2 digit random suffix to the default_ws_name + random_suffix = str(uuid.uuid4())[:2] + default_ws_name = f"{default_ws_name}-{random_suffix}" + + return ( + f"Ask the user for a name for the app directory with the default value: {default_ws_name}." + f"Ask the user to input YES or NO to use the default value." + ) + # # Ask user for workspace name if not provided + # ws_dir_name = Prompt.ask("Please provide a name for the app", default=default_ws_name, console=console) + + logger.info(f"Creating: {template} at {ws_dir_name}") + try: + create_successful = create_workspace(name=ws_dir_name, template=ws_template.value) + if create_successful: + return ( + f"Successfully created a {ws_template.value} at {ws_dir_name}. " + f"Ask the user if they want to start the app now." + ) + else: + return f"Error: Failed to create {template}" + except Exception as e: + return f"Error: {e}" + + def start_user_workspace(self, workspace_name: Optional[str] = None) -> str: + """Starts the workspace for a user. Use this function when the user wants to start a given workspace. + If the workspace name is not provided, the function will start the active workspace. + Otherwise, it will start the workspace with the given name. + + :param workspace_name: The name of the workspace to start + :return: Status of the function or next steps. + """ + from phi.cli.config import PhiCliConfig + from phi.infra.type import InfraType + from phi.workspace.config import WorkspaceConfig + from phi.workspace.operator import start_workspace + + phi_config: Optional[PhiCliConfig] = PhiCliConfig.from_saved_config() + if not phi_config: + return "Error: Phi not initialized. Please run `phi ai` again" + + workspace_config_to_start: Optional[WorkspaceConfig] = None + active_ws_config: Optional[WorkspaceConfig] = phi_config.get_active_ws_config() + + if workspace_name is None: + if active_ws_config is None: + return "Error: No active workspace found. Please create a workspace first." + workspace_config_to_start = active_ws_config + else: + workspace_config_by_name: Optional[WorkspaceConfig] = phi_config.get_ws_config_by_dir_name(workspace_name) + if workspace_config_by_name is None: + return f"Error: Could not find a workspace with name: {workspace_name}" + workspace_config_to_start = workspace_config_by_name + + # Set the active workspace to the workspace to start + if active_ws_config is not None and active_ws_config.ws_root_path != workspace_config_by_name.ws_root_path: + phi_config.set_active_ws_dir(workspace_config_by_name.ws_root_path) + active_ws_config = workspace_config_by_name + + try: + start_workspace( + phi_config=phi_config, + ws_config=workspace_config_to_start, + target_env="dev", + target_infra=InfraType.docker, + auto_confirm=True, + ) + return f"Successfully started workspace: {workspace_config_to_start.ws_root_path.stem}" + except Exception as e: + return f"Error: {e}" diff --git a/phi/assistant/tool/website.py b/phi/assistant/tool/website.py new file mode 100644 index 000000000..cbea35a5d --- /dev/null +++ b/phi/assistant/tool/website.py @@ -0,0 +1,50 @@ +import json +from typing import List, Optional + +from phi.document import Document +from phi.knowledge.website import WebsiteKnowledgeBase +from phi.assistant.tool.registry import ToolRegistry +from phi.utils.log import logger + + +class WebsiteTools(ToolRegistry): + def __init__(self, knowledge_base: Optional[WebsiteKnowledgeBase] = None): + super().__init__(name="website_tools") + self.knowledge_base: Optional[WebsiteKnowledgeBase] = knowledge_base + + if self.knowledge_base is not None and isinstance(self.knowledge_base, WebsiteKnowledgeBase): + self.register(self.add_website_to_knowledge_base) + else: + self.register(self.read_website) + + def add_website_to_knowledge_base(self, url: str) -> str: + """This function adds a websites content to the knowledge base. + NOTE: The website must start with https:// and should be a valid website. + + USE THIS FUNCTION TO GET INFORMATION ABOUT PRODUCTS FROM THE INTERNET. + + :param url: The url of the website to add. + :return: 'Success' if the website was added to the knowledge base. + """ + if self.knowledge_base is None: + return "Knowledge base not provided" + + logger.debug(f"Adding to knowledge base: {url}") + self.knowledge_base.urls.append(url) + logger.debug("Loading knowledge base.") + self.knowledge_base.load(recreate=False) + return "Success" + + def read_website(self, url: str) -> str: + """This function reads a website and returns the content. + + :param url: The url of the website to read. + :return: Relevant documents from the website. + """ + from phi.document.reader.website import WebsiteReader + + website = WebsiteReader() + + logger.debug(f"Reading website: {url}") + relevant_docs: List[Document] = website.read(url=url) + return json.dumps([doc.to_dict() for doc in relevant_docs]) diff --git a/phi/assistant/tool/wikipedia.py b/phi/assistant/tool/wikipedia.py new file mode 100644 index 000000000..cedd12d30 --- /dev/null +++ b/phi/assistant/tool/wikipedia.py @@ -0,0 +1,54 @@ +import json +from typing import List, Optional + +from phi.document import Document +from phi.knowledge.wikipedia import WikipediaKnowledgeBase +from phi.assistant.tool.registry import ToolRegistry +from phi.utils.log import logger + + +class WikipediaAgent(ToolRegistry): + def __init__(self, knowledge_base: Optional[WikipediaKnowledgeBase] = None): + super().__init__(name="wikipedia_tools") + self.knowledge_base: Optional[WikipediaKnowledgeBase] = knowledge_base + + if self.knowledge_base is not None and isinstance(self.knowledge_base, WikipediaKnowledgeBase): + self.register(self.search_wikipedia_and_update_knowledge_base) + else: + self.register(self.search_wikipedia) + + def search_wikipedia_and_update_knowledge_base(self, topic: str) -> str: + """This function searches wikipedia for a topic, adds the results to the knowledge base and returns them. + + USE THIS FUNCTION TO GET INFORMATION WHICH DOES NOT EXIST. + + :param topic: The topic to search Wikipedia and add to knowledge base. + :return: Relevant documents from Wikipedia knowledge base. + """ + + if self.knowledge_base is None: + return "Knowledge base not provided" + + logger.debug(f"Adding to knowledge base: {topic}") + self.knowledge_base.topics.append(topic) + logger.debug("Loading knowledge base.") + self.knowledge_base.load(recreate=False) + logger.debug(f"Searching knowledge base: {topic}") + relevant_docs: List[Document] = self.knowledge_base.search(query=topic) + return json.dumps([doc.to_dict() for doc in relevant_docs]) + + def search_wikipedia(self, query: str) -> str: + """Searches Wikipedia for a query. + + :param query: The query to search for. + :return: Relevant documents from wikipedia. + """ + try: + import wikipedia # noqa: F401 + except ImportError: + raise ImportError( + "The `wikipedia` package is not installed. " "Please install it via `pip install wikipedia`." + ) + + logger.info(f"Searching wikipedia for: {query}") + return json.dumps(Document(name=query, content=wikipedia.summary(query)).to_dict()) diff --git a/pyproject.toml b/pyproject.toml index c2f34a3de..8a7cc25cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "phidata" -version = "2.0.33" +version = "2.0.34" description = "AI Toolkit for Engineers" requires-python = ">=3.7" readme = "README.md"