From 8120b8f909153bc6b03495e7c9cab7ae62310157 Mon Sep 17 00:00:00 2001 From: Sunish Sheth Date: Tue, 11 Feb 2025 17:06:18 -0800 Subject: [PATCH] Deprecating sql_database access for creating UC functions for agent tools --- cookbook/README.md | 1 - cookbook/databricks_sql_db.ipynb | 273 ------------------ .../docs/integrations/providers/databricks.md | 9 +- .../utilities/sql_database.py | 8 + 4 files changed, 9 insertions(+), 282 deletions(-) delete mode 100644 cookbook/databricks_sql_db.ipynb diff --git a/cookbook/README.md b/cookbook/README.md index 2b90e07c7919a..6b18e508af71a 100644 --- a/cookbook/README.md +++ b/cookbook/README.md @@ -21,7 +21,6 @@ Notebook | Description [code-analysis-deeplake.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/code-analysis-deeplake.ipynb) | Analyze its own code base with the help of gpt and activeloop's deep lake. [custom_agent_with_plugin_retri...](https://github.com/langchain-ai/langchain/tree/master/cookbook/custom_agent_with_plugin_retrieval.ipynb) | Build a custom agent that can interact with ai plugins by retrieving tools and creating natural language wrappers around openapi endpoints. [custom_agent_with_plugin_retri...](https://github.com/langchain-ai/langchain/tree/master/cookbook/custom_agent_with_plugin_retrieval_using_plugnplai.ipynb) | Build a custom agent with plugin retrieval functionality, utilizing ai plugins from the `plugnplai` directory. -[databricks_sql_db.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/databricks_sql_db.ipynb) | Connect to databricks runtimes and databricks sql. [deeplake_semantic_search_over_...](https://github.com/langchain-ai/langchain/tree/master/cookbook/deeplake_semantic_search_over_chat.ipynb) | Perform semantic search and question-answering over a group chat using activeloop's deep lake with gpt4. [elasticsearch_db_qa.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/elasticsearch_db_qa.ipynb) | Interact with elasticsearch analytics databases in natural language and build search queries via the elasticsearch dsl API. [extraction_openai_tools.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/extraction_openai_tools.ipynb) | Structured Data Extraction with OpenAI Tools diff --git a/cookbook/databricks_sql_db.ipynb b/cookbook/databricks_sql_db.ipynb deleted file mode 100644 index 78fba6b914ee9..0000000000000 --- a/cookbook/databricks_sql_db.ipynb +++ /dev/null @@ -1,273 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "707d13a7", - "metadata": {}, - "source": [ - "# Databricks\n", - "\n", - "This notebook covers how to connect to the [Databricks runtimes](https://docs.databricks.com/runtime/index.html) and [Databricks SQL](https://www.databricks.com/product/databricks-sql) using the SQLDatabase wrapper of LangChain.\n", - "It is broken into 3 parts: installation and setup, connecting to Databricks, and examples." - ] - }, - { - "cell_type": "markdown", - "id": "0076d072", - "metadata": {}, - "source": [ - "## Installation and Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "739b489b", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install databricks-sql-connector" - ] - }, - { - "cell_type": "markdown", - "id": "73113163", - "metadata": {}, - "source": [ - "## Connecting to Databricks\n", - "\n", - "You can connect to [Databricks runtimes](https://docs.databricks.com/runtime/index.html) and [Databricks SQL](https://www.databricks.com/product/databricks-sql) using the `SQLDatabase.from_databricks()` method.\n", - "\n", - "### Syntax\n", - "```python\n", - "SQLDatabase.from_databricks(\n", - " catalog: str,\n", - " schema: str,\n", - " host: Optional[str] = None,\n", - " api_token: Optional[str] = None,\n", - " warehouse_id: Optional[str] = None,\n", - " cluster_id: Optional[str] = None,\n", - " engine_args: Optional[dict] = None,\n", - " **kwargs: Any)\n", - "```\n", - "### Required Parameters\n", - "* `catalog`: The catalog name in the Databricks database.\n", - "* `schema`: The schema name in the catalog.\n", - "\n", - "### Optional Parameters\n", - "There following parameters are optional. When executing the method in a Databricks notebook, you don't need to provide them in most of the cases.\n", - "* `host`: The Databricks workspace hostname, excluding 'https://' part. Defaults to 'DATABRICKS_HOST' environment variable or current workspace if in a Databricks notebook.\n", - "* `api_token`: The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. Defaults to 'DATABRICKS_TOKEN' environment variable or a temporary one is generated if in a Databricks notebook.\n", - "* `warehouse_id`: The warehouse ID in the Databricks SQL.\n", - "* `cluster_id`: The cluster ID in the Databricks Runtime. If running in a Databricks notebook and both 'warehouse_id' and 'cluster_id' are None, it uses the ID of the cluster the notebook is attached to.\n", - "* `engine_args`: The arguments to be used when connecting Databricks.\n", - "* `**kwargs`: Additional keyword arguments for the `SQLDatabase.from_uri` method." - ] - }, - { - "cell_type": "markdown", - "id": "b11c7e48", - "metadata": {}, - "source": [ - "## Examples" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8102bca0", - "metadata": {}, - "outputs": [], - "source": [ - "# Connecting to Databricks with SQLDatabase wrapper\n", - "from langchain_community.utilities import SQLDatabase\n", - "\n", - "db = SQLDatabase.from_databricks(catalog=\"samples\", schema=\"nyctaxi\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "9dd36f58", - "metadata": {}, - "outputs": [], - "source": [ - "# Creating a OpenAI Chat LLM wrapper\n", - "from langchain_openai import ChatOpenAI\n", - "\n", - "llm = ChatOpenAI(temperature=0, model_name=\"gpt-4\")" - ] - }, - { - "cell_type": "markdown", - "id": "5b5c5f1a", - "metadata": {}, - "source": [ - "### SQL Chain example\n", - "\n", - "This example demonstrates the use of the [SQL Chain](https://python.langchain.com/en/latest/modules/chains/examples/sqlite.html) for answering a question over a Databricks database." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "36f2270b", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.utilities import SQLDatabaseChain\n", - "\n", - "db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "4e2b5f25", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new SQLDatabaseChain chain...\u001b[0m\n", - "What is the average duration of taxi rides that start between midnight and 6am?\n", - "SQLQuery:\u001b[32;1m\u001b[1;3mSELECT AVG(UNIX_TIMESTAMP(tpep_dropoff_datetime) - UNIX_TIMESTAMP(tpep_pickup_datetime)) as avg_duration\n", - "FROM trips\n", - "WHERE HOUR(tpep_pickup_datetime) >= 0 AND HOUR(tpep_pickup_datetime) < 6\u001b[0m\n", - "SQLResult: \u001b[33;1m\u001b[1;3m[(987.8122786304605,)]\u001b[0m\n", - "Answer:\u001b[32;1m\u001b[1;3mThe average duration of taxi rides that start between midnight and 6am is 987.81 seconds.\u001b[0m\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'The average duration of taxi rides that start between midnight and 6am is 987.81 seconds.'" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "db_chain.run(\n", - " \"What is the average duration of taxi rides that start between midnight and 6am?\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "e496d5e5", - "metadata": {}, - "source": [ - "### SQL Database Agent example\n", - "\n", - "This example demonstrates the use of the [SQL Database Agent](/docs/integrations/tools/sql_database) for answering questions over a Databricks database." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "9918e86a", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.agents import create_sql_agent\n", - "from langchain_community.agent_toolkits import SQLDatabaseToolkit\n", - "\n", - "toolkit = SQLDatabaseToolkit(db=db, llm=llm)\n", - "agent = create_sql_agent(llm=llm, toolkit=toolkit, verbose=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "c484a76e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3mAction: list_tables_sql_db\n", - "Action Input: \u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3mtrips\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mI should check the schema of the trips table to see if it has the necessary columns for trip distance and duration.\n", - "Action: schema_sql_db\n", - "Action Input: trips\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3m\n", - "CREATE TABLE trips (\n", - "\ttpep_pickup_datetime TIMESTAMP, \n", - "\ttpep_dropoff_datetime TIMESTAMP, \n", - "\ttrip_distance FLOAT, \n", - "\tfare_amount FLOAT, \n", - "\tpickup_zip INT, \n", - "\tdropoff_zip INT\n", - ") USING DELTA\n", - "\n", - "/*\n", - "3 rows from trips table:\n", - "tpep_pickup_datetime\ttpep_dropoff_datetime\ttrip_distance\tfare_amount\tpickup_zip\tdropoff_zip\n", - "2016-02-14 16:52:13+00:00\t2016-02-14 17:16:04+00:00\t4.94\t19.0\t10282\t10171\n", - "2016-02-04 18:44:19+00:00\t2016-02-04 18:46:00+00:00\t0.28\t3.5\t10110\t10110\n", - "2016-02-17 17:13:57+00:00\t2016-02-17 17:17:55+00:00\t0.7\t5.0\t10103\t10023\n", - "*/\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe trips table has the necessary columns for trip distance and duration. I will write a query to find the longest trip distance and its duration.\n", - "Action: query_checker_sql_db\n", - "Action Input: SELECT trip_distance, tpep_dropoff_datetime - tpep_pickup_datetime as duration FROM trips ORDER BY trip_distance DESC LIMIT 1\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mSELECT trip_distance, tpep_dropoff_datetime - tpep_pickup_datetime as duration FROM trips ORDER BY trip_distance DESC LIMIT 1\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mThe query is correct. I will now execute it to find the longest trip distance and its duration.\n", - "Action: query_sql_db\n", - "Action Input: SELECT trip_distance, tpep_dropoff_datetime - tpep_pickup_datetime as duration FROM trips ORDER BY trip_distance DESC LIMIT 1\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m[(30.6, '0 00:43:31.000000000')]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3mI now know the final answer.\n", - "Final Answer: The longest trip distance is 30.6 miles and it took 43 minutes and 31 seconds.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'The longest trip distance is 30.6 miles and it took 43 minutes and 31 seconds.'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"What is the longest trip distance and how long did it take?\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/docs/integrations/providers/databricks.md b/docs/docs/integrations/providers/databricks.md index a45e8e3e03eb2..94de37da8b221 100644 --- a/docs/docs/integrations/providers/databricks.md +++ b/docs/docs/integrations/providers/databricks.md @@ -103,14 +103,7 @@ See [MLflow LangChain Integration](/docs/integrations/providers/mlflow_tracking) SQLDatabase ----------- -You can connect to Databricks SQL using the SQLDatabase wrapper of LangChain. -``` -from langchain.sql_database import SQLDatabase - -db = SQLDatabase.from_databricks(catalog="samples", schema="nyctaxi") -``` - -See [Databricks SQL Agent](https://docs.databricks.com/en/large-language-models/langchain.html#databricks-sql-agent) for how to connect Databricks SQL with your LangChain Agent as a powerful querying tool. +To connect to Databricks SQL or query structured data, see the [Databricks structured retriever tool documentation](https://docs.databricks.com/en/generative-ai/agent-framework/structured-retrieval-tools.html#table-query-tool). Open Models ----------- diff --git a/libs/community/langchain_community/utilities/sql_database.py b/libs/community/langchain_community/utilities/sql_database.py index d6ecc4ca008b3..20477bb18bb44 100644 --- a/libs/community/langchain_community/utilities/sql_database.py +++ b/libs/community/langchain_community/utilities/sql_database.py @@ -139,6 +139,14 @@ def from_uri( return cls(create_engine(database_uri, **_engine_args), **kwargs) @classmethod + @deprecated( + "0.3.18", + message="For performing structured retrieval using Databricks SQL, " + "see the latest best practices and recommended APIs at " + "https://docs.databricks.com/en/generative-ai/agent-framework/" + "structured-retrieval-tools.html#table-query-tool instead", + removal="1.0", + ) def from_databricks( cls, catalog: str,