From 7321841337f3c1371a2b11f15f5af233ede1d797 Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Fri, 15 Dec 2023 16:28:34 -0500
Subject: [PATCH 1/4] vanna vector storage
---
src/vanna/remote.py | 2 +-
src/vanna/vannadb/__init__.py | 0
src/vanna/vannadb/vannadb_vector.py | 183 ++++++++++++++++++++++++++++
3 files changed, 184 insertions(+), 1 deletion(-)
create mode 100644 src/vanna/vannadb/__init__.py
create mode 100644 src/vanna/vannadb/vannadb_vector.py
diff --git a/src/vanna/remote.py b/src/vanna/remote.py
index c5f843ae..d7ff8b02 100644
--- a/src/vanna/remote.py
+++ b/src/vanna/remote.py
@@ -197,7 +197,7 @@ def add_ddl(self, ddl: str, **kwargs) -> str:
return status.id
- def add_documentation(self, documentation: str, **kwargs) -> str:
+ def add_documentation(self, doc: str, **kwargs) -> str:
"""
Adds documentation to the model's training data
diff --git a/src/vanna/vannadb/__init__.py b/src/vanna/vannadb/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/vanna/vannadb/vannadb_vector.py b/src/vanna/vannadb/vannadb_vector.py
new file mode 100644
index 00000000..ac9955b0
--- /dev/null
+++ b/src/vanna/vannadb/vannadb_vector.py
@@ -0,0 +1,183 @@
+from ..base import VannaBase
+from ..types import (
+ QuestionSQLPair,
+ StatusWithId,
+ StringData,
+ DataFrameJSON,
+ Status,
+ TrainingData,
+ Question,
+)
+from io import StringIO
+import pandas as pd
+import requests
+import json
+
+class VannaDB_VectorStore(VannaBase):
+ def __init__(self, vanna_model: str, vanna_api_key: str, config=None):
+ VannaBase.__init__(self, config=config)
+
+ self._model = vanna_model
+ self._api_key = vanna_api_key
+
+ self._endpoint = (
+ "https://ask.vanna.ai/rpc"
+ if config is None or "endpoint" not in config
+ else config["endpoint"]
+ )
+ self._unauthenticated_endpoint = (
+ "https://ask.vanna.ai/unauthenticated_rpc"
+ if config is None or "unauthenticated_endpoint" not in config
+ else config["unauthenticated_endpoint"]
+ )
+ self.related_training_data = {}
+
+ def _unauthenticated_rpc_call(self, method, params):
+ headers = {
+ "Content-Type": "application/json",
+ }
+ data = {
+ "method": method,
+ "params": [self._dataclass_to_dict(obj) for obj in params],
+ }
+
+ response = requests.post(
+ self._unauthenticated_endpoint, headers=headers, data=json.dumps(data)
+ )
+ return response.json()
+
+ def _rpc_call(self, method, params):
+ if method != "list_orgs":
+ headers = {
+ "Content-Type": "application/json",
+ "Vanna-Key": self._api_key,
+ "Vanna-Org": self._model,
+ }
+ else:
+ headers = {
+ "Content-Type": "application/json",
+ "Vanna-Key": self._api_key,
+ "Vanna-Org": "demo-tpc-h",
+ }
+
+ data = {
+ "method": method,
+ "params": [self._dataclass_to_dict(obj) for obj in params],
+ }
+
+ response = requests.post(self._endpoint, headers=headers, data=json.dumps(data))
+ return response.json()
+
+ def generate_embedding(self, data: str, **kwargs) -> list[float]:
+ # This is done server-side
+ pass
+
+ def add_question_sql(self, question: str, sql: str, **kwargs) -> str:
+ if "tag" in kwargs:
+ tag = kwargs["tag"]
+ else:
+ tag = "Manually Trained"
+
+ params = [QuestionSQLPair(question=question, sql=sql, tag=tag)]
+
+ d = self._rpc_call(method="add_sql", params=params)
+
+ if "result" not in d:
+ raise Exception("Error adding question and SQL pair", d)
+
+ status = StatusWithId(**d["result"])
+
+ return status.id
+
+ def add_ddl(self, ddl: str, **kwargs) -> str:
+ params = [StringData(data=ddl)]
+
+ d = self._rpc_call(method="add_ddl", params=params)
+
+ if "result" not in d:
+ raise Exception("Error adding DDL", d)
+
+ status = StatusWithId(**d["result"])
+
+ return status.id
+
+ def add_documentation(self, doc: str, **kwargs) -> str:
+ params = [StringData(data=doc)]
+
+ d = self._rpc_call(method="add_documentation", params=params)
+
+ if "result" not in d:
+ raise Exception("Error adding documentation", d)
+
+ status = StatusWithId(**d["result"])
+
+ return status.id
+
+ def get_training_data(self, **kwargs) -> pd.DataFrame:
+ params = []
+
+ d = self._rpc_call(method="get_training_data", params=params)
+
+ if "result" not in d:
+ return None
+
+ # Load the result into a dataclass
+ training_data = DataFrameJSON(**d["result"])
+
+ df = pd.read_json(StringIO(training_data.data))
+
+ return df
+
+ def remove_training_data(self, id: str, **kwargs) -> bool:
+ params = [StringData(data=id)]
+
+ d = self._rpc_call(method="remove_training_data", params=params)
+
+ if "result" not in d:
+ raise Exception(f"Error removing training data")
+
+ status = Status(**d["result"])
+
+ if not status.success:
+ raise Exception(f"Error removing training data: {status.message}")
+
+ return status.success
+
+ def get_related_training_data_cached(self, question: str) -> TrainingData:
+ params = [Question(question=question)]
+
+ d = self.__rpc_call(method="get_related_training_data", params=params)
+
+ if "result" not in d:
+ return None
+
+ # Load the result into a dataclass
+ training_data = TrainingData(**d["result"])
+
+ self.related_training_data[question] = training_data
+
+ return training_data
+
+ def get_similar_question_sql(self, question: str, **kwargs) -> list:
+ if question in self.related_training_data:
+ training_data = self.related_training_data[question]
+ else:
+ training_data = self.get_related_training_data_cached(question)
+
+ return training_data.questions
+
+ def get_related_ddl(self, question: str, **kwargs) -> list:
+ if question in self.related_training_data:
+ training_data = self.related_training_data[question]
+ else:
+ training_data = self.get_related_training_data_cached(question)
+
+ return training_data.ddl
+
+ def get_related_documentation(self, question: str, **kwargs) -> list:
+ if question in self.related_training_data:
+ training_data = self.related_training_data[question]
+ else:
+ training_data = self.get_related_training_data_cached(question)
+
+ return training_data.documentation
\ No newline at end of file
From e764c38bb086a9a9b84aaf13a9321a0add9b1670 Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Fri, 15 Dec 2023 16:39:32 -0500
Subject: [PATCH 2/4] typo
---
src/vanna/vannadb/vannadb_vector.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/vanna/vannadb/vannadb_vector.py b/src/vanna/vannadb/vannadb_vector.py
index ac9955b0..ae31669f 100644
--- a/src/vanna/vannadb/vannadb_vector.py
+++ b/src/vanna/vannadb/vannadb_vector.py
@@ -146,7 +146,7 @@ def remove_training_data(self, id: str, **kwargs) -> bool:
def get_related_training_data_cached(self, question: str) -> TrainingData:
params = [Question(question=question)]
- d = self.__rpc_call(method="get_related_training_data", params=params)
+ d = self._rpc_call(method="get_related_training_data", params=params)
if "result" not in d:
return None
From faed4fb96601fb23a458012e9e2191d272656a9b Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Fri, 15 Dec 2023 16:42:39 -0500
Subject: [PATCH 3/4] missing method
---
src/vanna/vannadb/vannadb_vector.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/vanna/vannadb/vannadb_vector.py b/src/vanna/vannadb/vannadb_vector.py
index ae31669f..6378458b 100644
--- a/src/vanna/vannadb/vannadb_vector.py
+++ b/src/vanna/vannadb/vannadb_vector.py
@@ -12,6 +12,7 @@
import pandas as pd
import requests
import json
+import dataclasses
class VannaDB_VectorStore(VannaBase):
def __init__(self, vanna_model: str, vanna_api_key: str, config=None):
@@ -68,6 +69,9 @@ def _rpc_call(self, method, params):
response = requests.post(self._endpoint, headers=headers, data=json.dumps(data))
return response.json()
+ def _dataclass_to_dict(self, obj):
+ return dataclasses.asdict(obj)
+
def generate_embedding(self, data: str, **kwargs) -> list[float]:
# This is done server-side
pass
From 0e4c0a5665bfd0366afbecf0660989c122c12bbd Mon Sep 17 00:00:00 2001
From: Zain Hoda <7146154+zainhoda@users.noreply.github.com>
Date: Fri, 15 Dec 2023 18:56:00 -0500
Subject: [PATCH 4/4] use vanna vectordb with other llms
---
notebooks/bigquery-openai-azure-chromadb.ipynb | 2 +-
notebooks/bigquery-openai-azure-other-vectordb.ipynb | 2 +-
notebooks/bigquery-openai-azure-vannadb.ipynb | 2 +-
notebooks/bigquery-openai-standard-chromadb.ipynb | 2 +-
notebooks/bigquery-openai-standard-other-vectordb.ipynb | 2 +-
notebooks/bigquery-openai-standard-vannadb.ipynb | 2 +-
notebooks/bigquery-openai-vanna-chromadb.ipynb | 2 +-
notebooks/bigquery-openai-vanna-other-vectordb.ipynb | 2 +-
notebooks/bigquery-other-llm-chromadb.ipynb | 2 +-
notebooks/bigquery-other-llm-other-vectordb.ipynb | 2 +-
notebooks/bigquery-other-llm-vannadb.ipynb | 2 +-
notebooks/other-database-openai-azure-chromadb.ipynb | 2 +-
notebooks/other-database-openai-azure-other-vectordb.ipynb | 2 +-
notebooks/other-database-openai-azure-vannadb.ipynb | 2 +-
notebooks/other-database-openai-standard-chromadb.ipynb | 2 +-
notebooks/other-database-openai-standard-other-vectordb.ipynb | 2 +-
notebooks/other-database-openai-standard-vannadb.ipynb | 2 +-
notebooks/other-database-openai-vanna-chromadb.ipynb | 2 +-
notebooks/other-database-openai-vanna-other-vectordb.ipynb | 2 +-
notebooks/other-database-other-llm-chromadb.ipynb | 2 +-
notebooks/other-database-other-llm-other-vectordb.ipynb | 2 +-
notebooks/other-database-other-llm-vannadb.ipynb | 2 +-
notebooks/postgres-openai-azure-chromadb.ipynb | 2 +-
notebooks/postgres-openai-azure-other-vectordb.ipynb | 2 +-
notebooks/postgres-openai-azure-vannadb.ipynb | 2 +-
notebooks/postgres-openai-standard-chromadb.ipynb | 2 +-
notebooks/postgres-openai-standard-other-vectordb.ipynb | 2 +-
notebooks/postgres-openai-standard-vannadb.ipynb | 2 +-
notebooks/postgres-openai-vanna-chromadb.ipynb | 2 +-
notebooks/postgres-openai-vanna-other-vectordb.ipynb | 2 +-
notebooks/postgres-other-llm-chromadb.ipynb | 2 +-
notebooks/postgres-other-llm-other-vectordb.ipynb | 2 +-
notebooks/postgres-other-llm-vannadb.ipynb | 2 +-
notebooks/snowflake-openai-azure-chromadb.ipynb | 2 +-
notebooks/snowflake-openai-azure-other-vectordb.ipynb | 2 +-
notebooks/snowflake-openai-azure-vannadb.ipynb | 2 +-
notebooks/snowflake-openai-standard-chromadb.ipynb | 2 +-
notebooks/snowflake-openai-standard-other-vectordb.ipynb | 2 +-
notebooks/snowflake-openai-standard-vannadb.ipynb | 2 +-
notebooks/snowflake-openai-vanna-chromadb.ipynb | 2 +-
notebooks/snowflake-openai-vanna-other-vectordb.ipynb | 2 +-
notebooks/snowflake-other-llm-chromadb.ipynb | 2 +-
notebooks/snowflake-other-llm-other-vectordb.ipynb | 2 +-
notebooks/snowflake-other-llm-vannadb.ipynb | 2 +-
notebooks/sqlite-openai-azure-chromadb.ipynb | 2 +-
notebooks/sqlite-openai-azure-other-vectordb.ipynb | 2 +-
notebooks/sqlite-openai-azure-vannadb.ipynb | 2 +-
notebooks/sqlite-openai-standard-chromadb.ipynb | 2 +-
notebooks/sqlite-openai-standard-other-vectordb.ipynb | 2 +-
notebooks/sqlite-openai-standard-vannadb.ipynb | 2 +-
notebooks/sqlite-openai-vanna-chromadb.ipynb | 2 +-
notebooks/sqlite-openai-vanna-other-vectordb.ipynb | 2 +-
notebooks/sqlite-other-llm-chromadb.ipynb | 2 +-
notebooks/sqlite-other-llm-other-vectordb.ipynb | 2 +-
notebooks/sqlite-other-llm-vannadb.ipynb | 2 +-
55 files changed, 55 insertions(+), 55 deletions(-)
diff --git a/notebooks/bigquery-openai-azure-chromadb.ipynb b/notebooks/bigquery-openai-azure-chromadb.ipynb
index fb01b195..2f1cb670 100644
--- a/notebooks/bigquery-openai-azure-chromadb.ipynb
+++ b/notebooks/bigquery-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f439e467-8402-5423-9822-318c50b4831c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "37d5f6be-c475-58db-8892-2e893cde74b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n
Which LLM do you want to use?
\n\n "}, {"id": "96e88da1-27bf-55c0-a07a-6734ac70a45d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "b9cb2809-a933-5cab-a0bb-bf20e4c8aa6d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f439e467-8402-5423-9822-318c50b4831c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8263890c-aab4-5c63-abdd-bee76e425376", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "96e88da1-27bf-55c0-a07a-6734ac70a45d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "b9cb2809-a933-5cab-a0bb-bf20e4c8aa6d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-other-vectordb.ipynb b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
index 1f05c42e..5d0d845f 100644
--- a/notebooks/bigquery-openai-azure-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6c879303-e956-5806-a556-61e77a90db50", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "e091ee15-bdd4-5e5e-a449-273559ead5bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "35185a9e-35a3-56fe-b403-fe55f23a645e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "3268a082-6df7-5f38-9b61-346896359675", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "e091ee15-bdd4-5e5e-a449-273559ead5bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "cc10852e-5853-5c8c-a6bd-0561797b1386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-azure-vannadb.ipynb b/notebooks/bigquery-openai-azure-vannadb.ipynb
index 34fcb6ed..07cf0a80 100644
--- a/notebooks/bigquery-openai-azure-vannadb.ipynb
+++ b/notebooks/bigquery-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "fac5b46f-4116-58e0-889d-969818e7888b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "356b6fdb-eea5-5e81-8a4b-9037b24db0ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "0352a46c-ea13-5ed1-95d3-8cbc90da996e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "c5473333-5768-5fb8-9247-e26ba37fbf98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "fac5b46f-4116-58e0-889d-969818e7888b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "356b6fdb-eea5-5e81-8a4b-9037b24db0ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "0352a46c-ea13-5ed1-95d3-8cbc90da996e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "c5473333-5768-5fb8-9247-e26ba37fbf98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-chromadb.ipynb b/notebooks/bigquery-openai-standard-chromadb.ipynb
index 46d8eaf0..f7ac8a7b 100644
--- a/notebooks/bigquery-openai-standard-chromadb.ipynb
+++ b/notebooks/bigquery-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "774bf429-d57a-589e-a818-f746cfbd1333", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8b624c41-923d-5132-a672-b6b23cfb5665", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "225b22cd-cc83-592f-8aa9-e7222d40c4fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "905acd30-8700-5d7e-8602-dd760e75e5df", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "774bf429-d57a-589e-a818-f746cfbd1333", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "85aaf126-0a09-5638-873c-7c8a69ab06d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "225b22cd-cc83-592f-8aa9-e7222d40c4fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "290f762c-bf5d-5c45-b58c-a95a645231f8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,bigquery]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "905acd30-8700-5d7e-8602-dd760e75e5df", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-other-vectordb.ipynb b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
index 0482829a..f2a9c2ee 100644
--- a/notebooks/bigquery-openai-standard-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cfcd647c-643f-50c3-901c-97a155d21e52", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "f5dde447-1b11-5e43-af09-d4a5345bacb1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e613d63a-b713-506f-8fb3-41b2c0a8e863", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "06452bd9-d462-5934-9d48-7bcbe1fa7424", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "f5dde447-1b11-5e43-af09-d4a5345bacb1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "2718a3d3-183c-50b5-b959-be79bd3b071f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-standard-vannadb.ipynb b/notebooks/bigquery-openai-standard-vannadb.ipynb
index d8cf8c78..e1b632bf 100644
--- a/notebooks/bigquery-openai-standard-vannadb.ipynb
+++ b/notebooks/bigquery-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "2475ecc1-295b-55a3-86b7-9b851bec073e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eec50e07-2412-577f-9932-4a6e1e82720e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "29194c94-ffb6-5db6-a6f8-18a9ea63e8da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "2475ecc1-295b-55a3-86b7-9b851bec073e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eec50e07-2412-577f-9932-4a6e1e82720e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "da558514-b83a-5046-9a4d-f414e5bdc0f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "85007695-f172-57f7-8dd4-6f7db27f2633", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,bigquery]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "29194c94-ffb6-5db6-a6f8-18a9ea63e8da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-chromadb.ipynb b/notebooks/bigquery-openai-vanna-chromadb.ipynb
index ae3eebf9..51e84f6f 100644
--- a/notebooks/bigquery-openai-vanna-chromadb.ipynb
+++ b/notebooks/bigquery-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "525bec9c-32bd-5fc9-b211-7909d17a700e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "9868a22d-dde5-5db9-9d20-313adfc566bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "6acfa978-70fd-5fc0-b5cc-1f08e5f9fa2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "b0e02375-2dab-5455-ba4e-e7e35ef85359", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Implement this"}, {"id": "4b45f470-52c3-5551-b0f5-28a169a2417c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "525bec9c-32bd-5fc9-b211-7909d17a700e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "9868a22d-dde5-5db9-9d20-313adfc566bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "6acfa978-70fd-5fc0-b5cc-1f08e5f9fa2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "4b45f470-52c3-5551-b0f5-28a169a2417c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
index 64e70bb1..0f82bb1d 100644
--- a/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/bigquery-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1c681249-9362-583b-88d4-d95605fb5a56", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "323a83c6-fff4-503f-bf6e-f67d82ebff62", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "987b3c74-035d-562b-9288-167f1a027019", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1c681249-9362-583b-88d4-d95605fb5a56", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "323a83c6-fff4-503f-bf6e-f67d82ebff62", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "cc057755-c7d2-5e29-8f5d-1c8a5627898b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-chromadb.ipynb b/notebooks/bigquery-other-llm-chromadb.ipynb
index 5af9e7e9..2f263a85 100644
--- a/notebooks/bigquery-other-llm-chromadb.ipynb
+++ b/notebooks/bigquery-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "670a54ff-01bf-5be5-bbd3-b8f7b39b67c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "c4f8ab83-b95b-5fbd-8da9-0e093100938a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "b537adb9-0ba8-5d4a-b325-e517c6abc2e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "aa232add-2e37-50a8-9e80-c1b5d8a98346", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "670a54ff-01bf-5be5-bbd3-b8f7b39b67c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "aa67476e-d08b-5d53-bfba-64361994e30d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "b537adb9-0ba8-5d4a-b325-e517c6abc2e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "93b3d255-1a25-5088-af9e-25168efe4f8a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,bigquery]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "aa232add-2e37-50a8-9e80-c1b5d8a98346", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-other-vectordb.ipynb b/notebooks/bigquery-other-llm-other-vectordb.ipynb
index 01b18a09..1a821534 100644
--- a/notebooks/bigquery-other-llm-other-vectordb.ipynb
+++ b/notebooks/bigquery-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2e2c927a-e54d-57b0-9a08-b1234caf16f5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "36ad2412-992c-59fc-8b71-ca4d08ce4853", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "329a34b6-ac61-5e7f-bd53-4dea75316751", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "4a5c0082-e776-51f7-800f-e2a5d7e427bd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "36ad2412-992c-59fc-8b71-ca4d08ce4853", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "874380de-32c3-5bfc-b26b-3a4d227543fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/bigquery-other-llm-vannadb.ipynb b/notebooks/bigquery-other-llm-vannadb.ipynb
index 9583ef51..ff47fc2a 100644
--- a/notebooks/bigquery-other-llm-vannadb.ipynb
+++ b/notebooks/bigquery-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dab5efa9-fc30-5f4d-ae12-aca6cf81438d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8a869652-b05a-500c-bbdd-eba7206c348d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "9a2db276-d7f0-588e-87bb-33a1a6a0277d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "f74a2155-0435-5765-a95e-f89dc789d8f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dab5efa9-fc30-5f4d-ae12-aca6cf81438d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for BigQuery using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8a869652-b05a-500c-bbdd-eba7206c348d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "9a2db276-d7f0-588e-87bb-33a1a6a0277d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "c6271b0c-191a-5055-aa85-aadb291fd909", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[bigquery]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "f74a2155-0435-5765-a95e-f89dc789d8f1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "45b52fa3-74ea-5bb0-82e5-518731dbe674", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_bigquery(project_id='my-project')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-chromadb.ipynb b/notebooks/other-database-openai-azure-chromadb.ipynb
index cb6c5968..c09bc4bc 100644
--- a/notebooks/other-database-openai-azure-chromadb.ipynb
+++ b/notebooks/other-database-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "92ea27dc-5881-5eb6-93c3-455e04899d35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0459f271-cc86-5cbe-8ba8-90df5c33a6ff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "7acad14f-3030-5462-8cbb-7fcd09e23702", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "9f04a063-5398-5e44-ae93-cdde14aee529", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "92ea27dc-5881-5eb6-93c3-455e04899d35", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "ac054eb1-0e41-5fb8-b113-8f5917e90af2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "7acad14f-3030-5462-8cbb-7fcd09e23702", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "9f04a063-5398-5e44-ae93-cdde14aee529", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-other-vectordb.ipynb b/notebooks/other-database-openai-azure-other-vectordb.ipynb
index 931ec5be..1f36dd59 100644
--- a/notebooks/other-database-openai-azure-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b1b5651a-bf7a-5f39-a3c7-5ac592f70818", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "1c929444-5496-51a1-a50d-573bf8f8bf42", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "28e43ee2-89a3-5c6e-972c-18e14187ecbc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "adac2cff-cac2-50b1-a3bc-fa33c666c4f3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "1c929444-5496-51a1-a50d-573bf8f8bf42", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "06e1d645-97b3-5338-b39a-ed29e0adae10", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-azure-vannadb.ipynb b/notebooks/other-database-openai-azure-vannadb.ipynb
index 0ef03c87..cc5753ec 100644
--- a/notebooks/other-database-openai-azure-vannadb.ipynb
+++ b/notebooks/other-database-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c84b946c-c10e-5f4d-b0e9-1053cac2d9c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a67dcbda-66e8-5475-90f3-0af5f99a50be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "501eb87f-96cc-572a-8f3f-c750b45f9a2b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "5b6052f4-cae6-5e27-bb45-7e1c0aa43386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c84b946c-c10e-5f4d-b0e9-1053cac2d9c0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a67dcbda-66e8-5475-90f3-0af5f99a50be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "501eb87f-96cc-572a-8f3f-c750b45f9a2b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "5b6052f4-cae6-5e27-bb45-7e1c0aa43386", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-chromadb.ipynb b/notebooks/other-database-openai-standard-chromadb.ipynb
index affc09cd..74dc2c4f 100644
--- a/notebooks/other-database-openai-standard-chromadb.ipynb
+++ b/notebooks/other-database-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "deeb1a83-b623-52c8-9e9c-8fde4842d65e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f7fa0ec5-25cf-59e0-bf1a-78d844dfdbe0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "4bcef569-d644-5f3c-917a-8310f43644d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "f79e1f30-941b-5975-b947-e84bba787d9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "deeb1a83-b623-52c8-9e9c-8fde4842d65e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b48e56e0-fa9c-57b1-bb0f-a883627b9b6a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "4bcef569-d644-5f3c-917a-8310f43644d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "f79e1f30-941b-5975-b947-e84bba787d9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-other-vectordb.ipynb b/notebooks/other-database-openai-standard-other-vectordb.ipynb
index 2f6b6e78..204ab0f7 100644
--- a/notebooks/other-database-openai-standard-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "353a45f8-727b-5a6a-a975-df3903f9963f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "29473aa9-8149-5272-bf3b-e498edbcc2e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "83e6caa0-808d-5e3f-a3bc-dd6055253309", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f75f42eb-b92e-5d53-bed3-c78023dd83e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "29473aa9-8149-5272-bf3b-e498edbcc2e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "863e74db-ce0d-5c2b-abcf-a57a0583b638", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-standard-vannadb.ipynb b/notebooks/other-database-openai-standard-vannadb.ipynb
index a16796a7..07c0ea69 100644
--- a/notebooks/other-database-openai-standard-vannadb.ipynb
+++ b/notebooks/other-database-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "46dcb449-a2c9-571d-a7ce-d6450eb19571", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dad326e9-00ee-5e09-b5e4-53dfcf50fd98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "00c06f03-db34-5b90-9f5e-d3836f69d656", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "46dcb449-a2c9-571d-a7ce-d6450eb19571", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dad326e9-00ee-5e09-b5e4-53dfcf50fd98", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "805bcd0a-a97d-55f7-836f-9df9ced3fad4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "00c06f03-db34-5b90-9f5e-d3836f69d656", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-chromadb.ipynb b/notebooks/other-database-openai-vanna-chromadb.ipynb
index 54ee96bc..de1edee1 100644
--- a/notebooks/other-database-openai-vanna-chromadb.ipynb
+++ b/notebooks/other-database-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "0137feba-b7f4-52f8-985b-c86be0e0f5bb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dc98c84d-aefb-58a2-9cfb-212728645d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "317fde73-6c98-5e93-86b4-3ccb542cd831", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "b0e02375-2dab-5455-ba4e-e7e35ef85359", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Implement this"}, {"id": "2ad31f21-0a91-5647-8f68-469156d9f90c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "0137feba-b7f4-52f8-985b-c86be0e0f5bb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dc98c84d-aefb-58a2-9cfb-212728645d08", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "317fde73-6c98-5e93-86b4-3ccb542cd831", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "2ad31f21-0a91-5647-8f68-469156d9f90c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-openai-vanna-other-vectordb.ipynb b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
index e24da77d..e2d96be9 100644
--- a/notebooks/other-database-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/other-database-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a25ae5b7-8434-5986-ac9d-fbbd97e18339", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "5789a6a3-f0b7-5afb-95d7-f46ebc0ca1eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "dea72209-4768-5f52-ba75-ae7671cc46fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a25ae5b7-8434-5986-ac9d-fbbd97e18339", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "5789a6a3-f0b7-5afb-95d7-f46ebc0ca1eb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "43195316-d36c-517e-a899-130dc36e8acd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-chromadb.ipynb b/notebooks/other-database-other-llm-chromadb.ipynb
index 5dcb669f..45a0ab23 100644
--- a/notebooks/other-database-other-llm-chromadb.ipynb
+++ b/notebooks/other-database-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "7b709983-57b5-5bc7-940a-bcbf832468c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dfb6d6f6-3e37-5ac0-9d43-ed8245420149", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "1ac99bf9-08b8-5d77-82ee-7416409862ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "10714cee-02a9-5d1e-aa5a-841894519801", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "7b709983-57b5-5bc7-940a-bcbf832468c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "23f36fb4-5f32-5299-b982-3eb907384c18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "1ac99bf9-08b8-5d77-82ee-7416409862ce", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "10714cee-02a9-5d1e-aa5a-841894519801", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-other-vectordb.ipynb b/notebooks/other-database-other-llm-other-vectordb.ipynb
index 1493003e..bd7df920 100644
--- a/notebooks/other-database-other-llm-other-vectordb.ipynb
+++ b/notebooks/other-database-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b2ec90c7-3a2b-5c1a-b490-226d5801373e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "655215da-b934-5419-a73a-9bf4b77e96d8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d4a7bd4f-e238-5168-8c26-03ab7d9b6ceb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2f4ef27d-7fa0-542e-8ced-e0868950fc7e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "655215da-b934-5419-a73a-9bf4b77e96d8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "14a246cb-cc7e-5bb1-b931-cf58d8c86b53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/other-database-other-llm-vannadb.ipynb b/notebooks/other-database-other-llm-vannadb.ipynb
index 23ff1354..9a8bd1db 100644
--- a/notebooks/other-database-other-llm-vannadb.ipynb
+++ b/notebooks/other-database-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8b253300-a4e8-5b94-8ef5-958eefde2756", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b907358f-58aa-52bb-88bd-05ad52607b83", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "a6970aff-d479-566f-8fd4-4cd44851ce28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "d2df2e38-f9f0-5483-9181-59cefa62c124", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8b253300-a4e8-5b94-8ef5-958eefde2756", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Other Database using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b907358f-58aa-52bb-88bd-05ad52607b83", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "a6970aff-d479-566f-8fd4-4cd44851ce28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "d2df2e38-f9f0-5483-9181-59cefa62c124", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n \n \n
\n \n - \n \n
\n
[Selected] Other Database
\n
Use Vanna to generate queries for any SQL database\n
\n \n \n \n
\n "}, {"id": "9b30d7e1-f279-5b6a-a620-fc988020efcc", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nimport pandas as pd\n\n# There's usually a library for connecting to your type of database. Any SQL database will work here -- you just have to use the right library.\nconn_details = {...} # fill this with your connection details\nconn = ... # fill this with your connection object\n\n# You define a function that takes in a SQL query as a string and returns a pandas dataframe\ndef run_sql(sql: str) -> pd.DataFrame:\n df = pd.read_sql_query(sql, conn)\n return df\n\n# This gives the package a function that it can use to run the SQL\nvn.run_sql = run_sql\nvn.run_sql_is_set = True\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-chromadb.ipynb b/notebooks/postgres-openai-azure-chromadb.ipynb
index 0525f5b5..a287d044 100644
--- a/notebooks/postgres-openai-azure-chromadb.ipynb
+++ b/notebooks/postgres-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7cd5976-e784-52c5-be86-b454ffa806c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "272c4208-2cd5-5eba-9279-1debf6509bbd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "7a79bcc1-c725-5378-bb13-75291ac67f99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "125a30f1-60b6-5ad5-a154-40363adc38d1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7cd5976-e784-52c5-be86-b454ffa806c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "9facb873-2e7b-54ed-8d7b-8054ee2b1709", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "7a79bcc1-c725-5378-bb13-75291ac67f99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "125a30f1-60b6-5ad5-a154-40363adc38d1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-other-vectordb.ipynb b/notebooks/postgres-openai-azure-other-vectordb.ipynb
index 55f0f11e..d74c9f80 100644
--- a/notebooks/postgres-openai-azure-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f03f9508-51ff-5f9c-b134-922d2121265c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "46f63606-ab8f-5d12-b7a2-ca467c1921e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6c5bdd16-d84b-527f-a805-d01f53e7337c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "7c798bf5-b9ff-52c6-9317-467ed5d44b68", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "46f63606-ab8f-5d12-b7a2-ca467c1921e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "e11da77b-6e86-53a2-9c7b-700c2550921e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-azure-vannadb.ipynb b/notebooks/postgres-openai-azure-vannadb.ipynb
index 58a04581..f22ac3f1 100644
--- a/notebooks/postgres-openai-azure-vannadb.ipynb
+++ b/notebooks/postgres-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "778e258a-8586-5d24-bde4-0a1a6361f6a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cff68f11-eb15-5e47-a761-a69d6110e63e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "3f5adb23-3700-54cb-8072-9e953b9c5273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "6aed1fd9-7ffe-503a-bd0d-ee0fb914aeff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "778e258a-8586-5d24-bde4-0a1a6361f6a9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "cff68f11-eb15-5e47-a761-a69d6110e63e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "3f5adb23-3700-54cb-8072-9e953b9c5273", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "6aed1fd9-7ffe-503a-bd0d-ee0fb914aeff", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-chromadb.ipynb b/notebooks/postgres-openai-standard-chromadb.ipynb
index 91c9ffaf..5c9fa300 100644
--- a/notebooks/postgres-openai-standard-chromadb.ipynb
+++ b/notebooks/postgres-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "95b3f23b-49e5-5f79-bedc-f3e5dea0f14f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "20819ef8-ae2e-531c-9c70-2e57e3276608", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "3cfc3d9c-9ca0-55b4-9a78-7c033b5a5bf0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "69b47dc7-f766-59c7-8f50-3390f2044c0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "95b3f23b-49e5-5f79-bedc-f3e5dea0f14f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "a6516065-f98b-5674-ba9e-232c3fd9a992", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "3cfc3d9c-9ca0-55b4-9a78-7c033b5a5bf0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "30b351fd-e454-5983-b4aa-a522759a1dbe", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,postgres]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "69b47dc7-f766-59c7-8f50-3390f2044c0e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-other-vectordb.ipynb b/notebooks/postgres-openai-standard-other-vectordb.ipynb
index 7148d884..7d481e69 100644
--- a/notebooks/postgres-openai-standard-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "bbdfdbfb-96e5-5afb-9c7e-4a3e8d1b88a5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "bca6a0ac-79c7-59c7-8b54-1cba19c49b91", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "ad6a7ca6-de11-5e6c-accf-b908b3b5f536", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6f847725-3387-5ed0-ab50-c42e58a31c0d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "bca6a0ac-79c7-59c7-8b54-1cba19c49b91", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "b3cf62dc-7c47-56f5-8dd9-65fc75597155", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-standard-vannadb.ipynb b/notebooks/postgres-openai-standard-vannadb.ipynb
index 6e8e70bc..7d9f82fe 100644
--- a/notebooks/postgres-openai-standard-vannadb.ipynb
+++ b/notebooks/postgres-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "bd885f8c-8938-5a71-826f-6c4000c70508", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6f1aa45b-9c5c-5514-96a4-01cd2a4bc923", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "c6ccc82b-147a-5e2e-937d-74a8dc8c5582", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "bd885f8c-8938-5a71-826f-6c4000c70508", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6f1aa45b-9c5c-5514-96a4-01cd2a4bc923", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "ee38d28e-9586-5360-a872-95655441a9d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "5629279e-c760-54d5-8a49-05e0838d3a07", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,postgres]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "c6ccc82b-147a-5e2e-937d-74a8dc8c5582", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-chromadb.ipynb b/notebooks/postgres-openai-vanna-chromadb.ipynb
index c65793c4..27b265e5 100644
--- a/notebooks/postgres-openai-vanna-chromadb.ipynb
+++ b/notebooks/postgres-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "df07813d-72a1-5452-8dc9-f6894538a24b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d36ef7f7-0b29-5253-bb56-78b79ab74e53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "24cf5a7e-e723-58d1-ad4e-b28ead2724aa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "b0e02375-2dab-5455-ba4e-e7e35ef85359", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Implement this"}, {"id": "febd7f76-f6f4-570d-a21f-a522c19a44dd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "df07813d-72a1-5452-8dc9-f6894538a24b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "d36ef7f7-0b29-5253-bb56-78b79ab74e53", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "24cf5a7e-e723-58d1-ad4e-b28ead2724aa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "febd7f76-f6f4-570d-a21f-a522c19a44dd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-openai-vanna-other-vectordb.ipynb b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
index 3bae8bad..1357282f 100644
--- a/notebooks/postgres-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/postgres-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eedd396b-ecb5-55a8-b006-4387078a5045", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "0bd9a7e5-7502-5050-a738-fca444680f71", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "c6b9a340-0204-5267-b461-47450cdb8a2d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eedd396b-ecb5-55a8-b006-4387078a5045", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "0bd9a7e5-7502-5050-a738-fca444680f71", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "ee11dcde-9152-5e53-860a-bb72016db15b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-chromadb.ipynb b/notebooks/postgres-other-llm-chromadb.ipynb
index 62e498cf..b7056d88 100644
--- a/notebooks/postgres-other-llm-chromadb.ipynb
+++ b/notebooks/postgres-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "30e88248-26ea-53ad-93a6-9b3d5da41033", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6893c85d-c990-5a7d-b2d4-0ef5629d6456", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "9c8da62e-70bb-5a3b-b468-25c7275aa943", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "2436d4ef-a066-5584-b97d-9696c8bbb9ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "30e88248-26ea-53ad-93a6-9b3d5da41033", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "bc051b84-36f0-5b70-b526-2523444ce4fd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "9c8da62e-70bb-5a3b-b468-25c7275aa943", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "3642397b-e00d-58d8-8500-e501ec0f7e4e", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,postgres]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "2436d4ef-a066-5584-b97d-9696c8bbb9ec", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-other-vectordb.ipynb b/notebooks/postgres-other-llm-other-vectordb.ipynb
index 467b303a..52e2ca4d 100644
--- a/notebooks/postgres-other-llm-other-vectordb.ipynb
+++ b/notebooks/postgres-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "64c36d45-b45c-52be-afbe-47438ae52543", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "12091978-155e-5893-843f-42a69071be9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "3a23b994-dde6-5290-a4ae-5c0fbc8143d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e44713bb-5ba1-5a68-9d1b-9b8ee0b66b75", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "12091978-155e-5893-843f-42a69071be9d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "8af744f9-5cb5-5a01-b58b-d736caac0164", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/postgres-other-llm-vannadb.ipynb b/notebooks/postgres-other-llm-vannadb.ipynb
index cd10bc4b..e9fcdde7 100644
--- a/notebooks/postgres-other-llm-vannadb.ipynb
+++ b/notebooks/postgres-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cd757483-4d52-5a68-94b5-0244087a9cdb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "19d8bfa9-cb21-5f99-8fd1-e23bdc5456b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "14feae0c-c763-54d1-bf1e-a2b013e18ddd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "7b97e4ca-e994-5cf9-a09a-36a8e12f6abd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cd757483-4d52-5a68-94b5-0244087a9cdb", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Postgres using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "19d8bfa9-cb21-5f99-8fd1-e23bdc5456b1", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "14feae0c-c763-54d1-bf1e-a2b013e18ddd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "bdef8f4c-bd17-56af-8840-6452768ea0f3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[postgres]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "7b97e4ca-e994-5cf9-a09a-36a8e12f6abd", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "38707ef4-8063-5170-873a-45e63c1928a7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_postgres(host='my-host', dbname='my-dbname', user='my-user', password='my-password', port='my-port')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-chromadb.ipynb b/notebooks/snowflake-openai-azure-chromadb.ipynb
index 645b7c1f..a5dde82a 100644
--- a/notebooks/snowflake-openai-azure-chromadb.ipynb
+++ b/notebooks/snowflake-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6287c685-ff0c-5a75-a58f-fd9d47b14fc3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "6c47acbd-6fdd-51d5-8594-e76200e78c68", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "0d342fd1-a5ff-5d7f-86c4-cad506f84ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "cd3454f0-9566-5b7b-8e28-ca637b1c3964", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6287c685-ff0c-5a75-a58f-fd9d47b14fc3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "74d715d6-7ece-5bdd-aea9-8384950b3800", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "0d342fd1-a5ff-5d7f-86c4-cad506f84ae4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "cd3454f0-9566-5b7b-8e28-ca637b1c3964", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-other-vectordb.ipynb b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
index ed05f4d2..082230e0 100644
--- a/notebooks/snowflake-openai-azure-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "5b42aa31-8015-5683-9520-40f573312237", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "44bc4304-7303-5efc-8b46-3c795d4758fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "6aac9f07-876b-5289-9f4e-7524132617ad", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "eef234eb-aee8-541f-b32f-f8bde6f23759", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "44bc4304-7303-5efc-8b46-3c795d4758fa", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "a90a47b5-ed37-5e9a-abfb-044d3b6938e2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-azure-vannadb.ipynb b/notebooks/snowflake-openai-azure-vannadb.ipynb
index 001c8d0a..32f0ed22 100644
--- a/notebooks/snowflake-openai-azure-vannadb.ipynb
+++ b/notebooks/snowflake-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "813e792b-6025-521d-acaf-cc20cbd83c99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "fb1e5992-1b48-5122-bc78-0d07ebc6a5c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "f87cecbf-d98e-5a64-a354-715ab3da3c36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "ab74636a-a65f-5a2d-bca2-b59efd9b8e18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "813e792b-6025-521d-acaf-cc20cbd83c99", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "fb1e5992-1b48-5122-bc78-0d07ebc6a5c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "f87cecbf-d98e-5a64-a354-715ab3da3c36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "ab74636a-a65f-5a2d-bca2-b59efd9b8e18", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-chromadb.ipynb b/notebooks/snowflake-openai-standard-chromadb.ipynb
index cab6ecf5..e1f478d7 100644
--- a/notebooks/snowflake-openai-standard-chromadb.ipynb
+++ b/notebooks/snowflake-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "75481477-4412-582b-aba7-52183e26f37c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b7a98b02-da5b-50c2-b98e-839144dcec4b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "a8ac9a78-92a7-56dd-8352-c00f6df060ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "6272d9bb-f7dd-5b43-8db4-64817b295df0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "75481477-4412-582b-aba7-52183e26f37c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "0735cc09-44bd-5162-bf7a-1e92ec717546", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "a8ac9a78-92a7-56dd-8352-c00f6df060ab", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "216dcd51-21ea-5740-a271-269aa81f38e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai,snowflake]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "6272d9bb-f7dd-5b43-8db4-64817b295df0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-other-vectordb.ipynb b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
index 83d1b86f..d73fd65f 100644
--- a/notebooks/snowflake-openai-standard-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8467844f-7a92-5683-9669-98ff96836389", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "d80b2999-7fe1-5d05-9db4-bc257c081a7c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "a53aec8f-5a05-50f3-9209-7bbf1dca9e6f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8428c985-6bf9-5c87-9d34-56ae18e6f776", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "d80b2999-7fe1-5d05-9db4-bc257c081a7c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "8bbc71c3-929d-5119-9d69-63fa678c23a0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-standard-vannadb.ipynb b/notebooks/snowflake-openai-standard-vannadb.ipynb
index 1683dc15..f153ac3e 100644
--- a/notebooks/snowflake-openai-standard-vannadb.ipynb
+++ b/notebooks/snowflake-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f13605be-e49c-5be3-87bc-2a90efb57306", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcb1bb48-bc00-5b3f-92c3-0fe0d671c153", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "2ebd09c1-87dd-501f-a64a-f1404386b4f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f13605be-e49c-5be3-87bc-2a90efb57306", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcb1bb48-bc00-5b3f-92c3-0fe0d671c153", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "8fb7581d-c7bf-55d2-9df9-94635c47f261", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "0f72722e-743a-5317-9c08-564f18d4f8ef", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai,snowflake]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "2ebd09c1-87dd-501f-a64a-f1404386b4f0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-chromadb.ipynb b/notebooks/snowflake-openai-vanna-chromadb.ipynb
index a606d63a..faa822ad 100644
--- a/notebooks/snowflake-openai-vanna-chromadb.ipynb
+++ b/notebooks/snowflake-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e493593c-e8c4-5cdd-bdb8-e5d8bb39b0c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8394c9a-5605-50ad-8250-3ab96af74425", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "ecdf606b-37b1-57e1-a8c1-39ec21da67f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "b0e02375-2dab-5455-ba4e-e7e35ef85359", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Implement this"}, {"id": "fdfb6c1f-9b01-5034-9aa5-75cd2e199d28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e493593c-e8c4-5cdd-bdb8-e5d8bb39b0c6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f8394c9a-5605-50ad-8250-3ab96af74425", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "ecdf606b-37b1-57e1-a8c1-39ec21da67f2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "fdfb6c1f-9b01-5034-9aa5-75cd2e199d28", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
index 70ae8b3e..0b04bb2c 100644
--- a/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/snowflake-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "63dd4380-37f0-5d63-b20e-7373b1487925", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "bb99a675-f2f6-5840-9f13-13982b887387", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "42212054-35d6-56f3-be8e-47af265d9df9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "63dd4380-37f0-5d63-b20e-7373b1487925", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "bb99a675-f2f6-5840-9f13-13982b887387", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "deada75c-50bd-5cd8-95ff-e187e794d45e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-chromadb.ipynb b/notebooks/snowflake-other-llm-chromadb.ipynb
index 516ae02d..361a0400 100644
--- a/notebooks/snowflake-other-llm-chromadb.ipynb
+++ b/notebooks/snowflake-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "f755ebca-fde7-5eb1-802d-2c830e0b6282", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "031d8b36-d1dc-5493-9baf-67c943d3eb45", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "765e0228-2970-5cd0-b71c-7880112c121b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "fbd8aa4c-3dcb-52af-90a7-f358238c13b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "f755ebca-fde7-5eb1-802d-2c830e0b6282", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "945069a0-5611-5b19-abc7-025b5c4ab63b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "765e0228-2970-5cd0-b71c-7880112c121b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b20231ea-3913-5a89-8c43-2e14b2173112", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,snowflake]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "fbd8aa4c-3dcb-52af-90a7-f358238c13b6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-other-vectordb.ipynb b/notebooks/snowflake-other-llm-other-vectordb.ipynb
index 2990cecb..cdcb05b1 100644
--- a/notebooks/snowflake-other-llm-other-vectordb.ipynb
+++ b/notebooks/snowflake-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e98df422-322d-59b5-b3d1-0dbcff2d2d93", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "1577379b-aaa2-5b38-b4de-eeb16e98bcaf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "cb9146eb-a0a0-5e0f-82b7-59e92f069498", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "dcfb5b01-ae11-5d69-a537-5994e61953c8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "1577379b-aaa2-5b38-b4de-eeb16e98bcaf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "c1fd8a1f-0db6-5067-af77-776edcb4ed06", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/snowflake-other-llm-vannadb.ipynb b/notebooks/snowflake-other-llm-vannadb.ipynb
index dbdfb7b8..badedf52 100644
--- a/notebooks/snowflake-other-llm-vannadb.ipynb
+++ b/notebooks/snowflake-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "5dcc604a-caa9-5f6d-ab0e-3f417516c076", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "15b23da4-cc7e-595d-a5f7-822a32b0f4e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "76440ab2-1a0b-5d31-a223-58bc3bd7a8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "4442a4e5-f9cb-5f37-bf17-c51c08b5a35b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "5dcc604a-caa9-5f6d-ab0e-3f417516c076", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for Snowflake using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "15b23da4-cc7e-595d-a5f7-822a32b0f4e8", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "76440ab2-1a0b-5d31-a223-58bc3bd7a8fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "188e4729-c712-598c-a264-482bcf0f552c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[snowflake]'"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "4442a4e5-f9cb-5f37-bf17-c51c08b5a35b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "392c4dd9-cfbd-5d5d-8346-78041426ee08", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn.connect_to_snowflake(\n account=\"myaccount\",\n username=\"myusername\",\n password=\"mypassword\",\n database=\"mydatabase\",\n role=\"myrole\",\n)\n"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-chromadb.ipynb b/notebooks/sqlite-openai-azure-chromadb.ipynb
index afd5b4b7..15dd4932 100644
--- a/notebooks/sqlite-openai-azure-chromadb.ipynb
+++ b/notebooks/sqlite-openai-azure-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "262d99dc-b1f5-52a2-b3dd-c959896d40d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "74475edd-623b-5aee-b21f-65297f7ef6bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "af9b8e47-251a-57c4-bbf8-b44aff8fd7d4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "860d03fb-aa03-53d2-b703-3ca6f77232b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "262d99dc-b1f5-52a2-b3dd-c959896d40d0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "028d9db5-f0e6-5b2e-a8ff-59aa1c44d06a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "af9b8e47-251a-57c4-bbf8-b44aff8fd7d4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "93b5ab2b-834b-5b86-8d47-c9beda8b3544", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "860d03fb-aa03-53d2-b703-3ca6f77232b0", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-other-vectordb.ipynb b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
index 0f95b5ef..5b8829a4 100644
--- a/notebooks/sqlite-openai-azure-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-azure-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "e24bd0db-f7db-5c69-82db-d739466e1c1e", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "80ef7877-e065-5927-8d3d-3c7a24883e17", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "48659c68-dbb3-5f89-a750-75a3bd1d4872", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1fa03e27-468c-521b-8b1f-a2f02afbc8f7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "80ef7877-e065-5927-8d3d-3c7a24883e17", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "7ac5fa06-681a-5c2f-abdc-8541cfe6f770", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-azure-vannadb.ipynb b/notebooks/sqlite-openai-azure-vannadb.ipynb
index e6235a1f..8e062179 100644
--- a/notebooks/sqlite-openai-azure-vannadb.ipynb
+++ b/notebooks/sqlite-openai-azure-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "79b5f2a1-f4ac-5c48-9451-4d4c5e9bbb4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1eee9e17-3f72-51f9-9522-da40b200cd94", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "c919366c-b0da-5326-9b4f-a5e5ee71b7be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "65aecd1f-6612-5cfc-93b3-38d8f1de93dd", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nvn = LocalContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "e3bfcfae-1df1-5e15-a4f1-e49fa8aed61b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "79b5f2a1-f4ac-5c48-9451-4d4c5e9bbb4d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Azure OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "1eee9e17-3f72-51f9-9522-da40b200cd94", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "c919366c-b0da-5326-9b4f-a5e5ee71b7be", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "d3f4d823-ad6c-575a-9797-666a5dc2b4d0", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI(config={\n \"api_type\": \"azure\",\n \"api_base\": \"https://...\",\n \"api_version\": \"2023-05-15\",\n \"engine\": \"YOUR_ENGINE_HERE\",\n \"api_key\": \"sk-...\"\n})\n"}, {"id": "e3bfcfae-1df1-5e15-a4f1-e49fa8aed61b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-chromadb.ipynb b/notebooks/sqlite-openai-standard-chromadb.ipynb
index 53971015..d53bb6b4 100644
--- a/notebooks/sqlite-openai-standard-chromadb.ipynb
+++ b/notebooks/sqlite-openai-standard-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "af8cd30a-415d-5ac9-9511-853d099fca5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "fa9a8c34-d75e-53c5-9559-ca0aee4cf665", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "ba1444db-44d3-5ca7-8461-308b312e053f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "70963293-29e6-57c4-95f1-b5045d63a75b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "af8cd30a-415d-5ac9-9511-853d099fca5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8c7fce83-a9d6-5846-9f59-6217ea40d3e9", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "ba1444db-44d3-5ca7-8461-308b312e053f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "d6e3ecc5-3c05-518b-8285-cf9dbf06ec58", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb,openai]'"}, {"id": "381badf7-67a1-5d18-ad08-30ada1767adf", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.local import LocalContext_OpenAI\n"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "70963293-29e6-57c4-95f1-b5045d63a75b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-other-vectordb.ipynb b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
index d504e608..39670bda 100644
--- a/notebooks/sqlite-openai-standard-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-standard-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "92bec11d-c535-559c-ab1b-295a4227fba4", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "3da5bae9-ef50-5164-9b90-2eeb8c07e96f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "8fedd282-2590-569a-93ce-ab6c6a4fa48a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "97ad1de6-1fb0-5c04-8378-6c21db0447da", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "3da5bae9-ef50-5164-9b90-2eeb8c07e96f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "fe1b5686-8226-53d5-b42b-a29cdb088cad", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.base import VannaBase\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "aac12e94-b008-55dc-af6f-3199c9b8fb36", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-standard-vannadb.ipynb b/notebooks/sqlite-openai-standard-vannadb.ipynb
index 71611d18..2583bc2d 100644
--- a/notebooks/sqlite-openai-standard-vannadb.ipynb
+++ b/notebooks/sqlite-openai-standard-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "078f7efe-c23e-5d4c-98ee-a1d3e014992f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "32bc8aa3-e5a6-575b-bbb4-aaed33dd641f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "49556b54-d07a-5de6-ac73-bde835c227f6", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.local import LocalContext_OpenAI"}, {"id": "d421deda-8a4c-587b-9eac-4aa127875046", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn = LocalContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n# vn = LocalContext_OpenAI({\"api_key\": \"sk-...\", \"model\": \"gpt-4-...\"}) # If you want to use a specific OpenAI model\n "}, {"id": "2de3b0d3-71e4-5335-9e65-29c27aff8e1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "078f7efe-c23e-5d4c-98ee-a1d3e014992f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "32bc8aa3-e5a6-575b-bbb4-aaed33dd641f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "394e0f93-821f-5ba7-9cf5-4574fbb026bc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "925749d7-7c6c-5599-a063-ad2cad7b52ab", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[openai]'"}, {"id": "4ff1aaee-1154-5859-b8c3-93ac3c31595d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.vannadb.vannadb_vector import VannaDB_VectorStore\n"}, {"id": "43b24c20-9cca-55be-8f1e-5cdbff80c916", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass RemoteContext_OpenAI(VannaDB_VectorStore, OpenAI_Chat):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n OpenAI_Chat.__init__(self, config=config)\n\nvn = RemoteContext_OpenAI({\"api_key\": \"sk-...\"}) # This is your OpenAI API key\n\n"}, {"id": "2de3b0d3-71e4-5335-9e65-29c27aff8e1d", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-chromadb.ipynb b/notebooks/sqlite-openai-vanna-chromadb.ipynb
index afee69db..eb1bead7 100644
--- a/notebooks/sqlite-openai-vanna-chromadb.ipynb
+++ b/notebooks/sqlite-openai-vanna-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "9d6519a8-e544-5523-82ce-97784be01264", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "5d6f5c01-f5dc-5175-8812-f4cb8e18fb3a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "63b0e89e-0abb-50e3-8485-70155d8aa30b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "b0e02375-2dab-5455-ba4e-e7e35ef85359", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Implement this"}, {"id": "13991486-a5af-5687-9351-5b4159aeb502", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "9d6519a8-e544-5523-82ce-97784be01264", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "5d6f5c01-f5dc-5175-8812-f4cb8e18fb3a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "63b0e89e-0abb-50e3-8485-70155d8aa30b", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "c1e5ad61-57c7-5b64-920b-6f5b435df5e3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "13991486-a5af-5687-9351-5b4159aeb502", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
index 04019f6a..934d05fc 100644
--- a/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
+++ b/notebooks/sqlite-openai-vanna-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f60d2282-6489-597d-9427-ba74a7d299fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "da681ba4-1d8d-5ff4-be35-5803a677a21c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "506bdbf0-9097-5ea8-a3ac-fbbee8ce189c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using OpenAI via Vanna.AI (Recommended), Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "f60d2282-6489-597d-9427-ba74a7d299fc", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "da681ba4-1d8d-5ff4-be35-5803a677a21c", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "f5348d7a-0fae-5651-880e-1b365a3a1257", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-chromadb.ipynb b/notebooks/sqlite-other-llm-chromadb.ipynb
index d82352c8..e21ca4f3 100644
--- a/notebooks/sqlite-other-llm-chromadb.ipynb
+++ b/notebooks/sqlite-other-llm-chromadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "d3c0d01c-97e4-50e9-8e05-32bbed37f5d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "22626ab7-a949-5ee0-902f-83d9d7d0ec2f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "8b5cba50-be4a-5111-8bbf-c7235bdd38c2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "b2854838-2902-59d3-b55b-bf59f3a4d888", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "d3c0d01c-97e4-50e9-8e05-32bbed37f5d3", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, ChromaDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "8abf4215-90e0-58a1-868a-71e3e76fb0d5", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "8b5cba50-be4a-5111-8bbf-c7235bdd38c2", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "1a0086e2-0a57-5091-accd-456e4d3e4ad7", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install 'vanna[chromadb]'"}, {"id": "a70195e6-7c1f-519f-8413-4ad4e6b3570d", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\nfrom vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\n"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "b2854838-2902-59d3-b55b-bf59f3a4d888", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-other-vectordb.ipynb b/notebooks/sqlite-other-llm-other-vectordb.ipynb
index 4a758ed0..c2c9fc2e 100644
--- a/notebooks/sqlite-other-llm-other-vectordb.ipynb
+++ b/notebooks/sqlite-other-llm-other-vectordb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "5aa3e8a3-5e2c-5139-b2cd-4a77bf683067", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "e34c0da3-6b6a-5a04-aa97-d3dbcef9aec7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "a5fc40a5-c25e-5a5d-9969-9a7b29ecd8b8", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nfrom vanna.base import VannaBase\nfrom vanna.openai.openai_chat import OpenAI_Chat\nfrom vanna.openai.openai_embeddings import OpenAI_Embeddings\n\n"}, {"id": "ac5a59f9-512a-580e-b16c-35773408012a", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomVectorDB(VannaBase):\n def add_ddl(self, ddl: str, **kwargs) -> str:\n # Implement here\n\n def add_documentation(self, doc: str, **kwargs) -> str:\n # Implement here\n\n def add_question_sql(self, question: str, sql: str, **kwargs) -> str:\n # Implement here\n\n def get_related_ddl(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_related_documentation(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_similar_question_sql(self, question: str, **kwargs) -> list:\n # Implement here\n\n def get_training_data(self, **kwargs) -> pd.DataFrame:\n # Implement here\n\n def remove_training_data(id: str, **kwargs) -> bool:\n # Implement here\n\nclass MyCustomVectorDB_OpenAI(MyCustomVectorDB, OpenAI_Chat, OpenAI_Embeddings):\n def __init__(self, config=None):\n MyCustomVectorDB.__init__(self, config=config)\n OpenAI_Chat.__init__(self, config=config)\n"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "e7a07123-59ae-5e9d-9fc4-8606541246e6", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Other VectorDB\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "2150480b-b185-5dec-8ce6-febe910b1715", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "e34c0da3-6b6a-5a04-aa97-d3dbcef9aec7", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "35e4b991-0e84-561d-8c1e-979bb4252ec9", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.base import VannaBase\n"}, {"id": "fa163aab-7884-56cb-8b22-135272dc8975", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# TODO: Honestly, why are you even using the package at this point? You should be contributing to it!"}, {"id": "ef8ef020-2101-56df-8aa3-a09caae07971", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
diff --git a/notebooks/sqlite-other-llm-vannadb.ipynb b/notebooks/sqlite-other-llm-vannadb.ipynb
index 297079f6..4fb77d4b 100644
--- a/notebooks/sqlite-other-llm-vannadb.ipynb
+++ b/notebooks/sqlite-other-llm-vannadb.ipynb
@@ -1 +1 @@
-{"cells": [{"id": "22fc9194-d307-554e-84c7-5c61665bd738", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b1c0a0b1-77c4-55b4-8715-033185b75886", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "7090c61a-c77d-565f-92d5-3faf38d8e9bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "f261a97e-dfd6-59bc-aaae-1d6ac8df2df1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore\nfrom vanna.base import VannaBase"}, {"id": "1359c7fc-3b38-5912-bab7-de38cc2f4885", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass LocalContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n ChromaDB_VectorStore.__init__(self, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = LocalContext_MyLLM()\n"}, {"id": "b141daf0-a79d-59a4-811c-1626ba88d07a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file
+{"cells": [{"id": "22fc9194-d307-554e-84c7-5c61665bd738", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "# Generating SQL for SQLite using Other LLM, Vanna Hosted Vector DB (Recommended)\nThis notebook runs through the process of using the `vanna` Python package to generate SQL using AI (RAG + LLMs) including connecting to a database and training. If you're not ready to train on your own database, you can still try it using a sample [SQLite database](getting-started.html)."}, {"id": "b1c0a0b1-77c4-55b4-8715-033185b75886", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich LLM do you want to use?
\n\n "}, {"id": "7090c61a-c77d-565f-92d5-3faf38d8e9bf", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhere do you want to store the 'training' data?
\n\n "}, {"id": "ee059407-58ac-50fa-843a-7b876328df13", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Setup"}, {"id": "b9b77362-c049-5500-b502-08811fcd4dce", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "%pip install vanna"}, {"id": "bfe31937-16c5-5ecb-9aea-0cc1b2aec53c", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "from vanna.vannadb.vannadb_vector import VannaDB_VectorStore\nfrom vanna.base import VannaBase\n"}, {"id": "8cb1a477-45fd-5153-b5c4-92c90f869df4", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nclass MyCustomLLM(VannaBase):\n def __init__(self, config=None):\n pass\n\n def generate_plotly_code(self, question: str = None, sql: str = None, df_metadata: str = None, **kwargs) -> str:\n # Implement here\n\n def generate_question(self, sql: str, **kwargs) -> str:\n # Implement here\n \n def get_followup_questions_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n \n def get_sql_prompt(self, question: str, question_sql_list: list, ddl_list: list, doc_list: list, **kwargs):\n # Implement here\n\n def submit_prompt(self, prompt, **kwargs) -> str:\n # Implement here\n\nclass RemoteContext_MyLLM(ChromaDB_VectorStore, MyCustomLLM):\n def __init__(self, config=None):\n VannaDB_VectorStore.__init__(self, vanna_model=my-model, vanna_api_key=my-vanna-api-key, config=config)\n MyCustomLLM.__init__(self, config=config)\n\nvn = RemoteContext_MyLLM()\n"}, {"id": "b141daf0-a79d-59a4-811c-1626ba88d07a", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "\nWhich database do you want to query?
\n\n "}, {"id": "4bb60e4c-1036-5c5d-84c6-11c9f2e9c8d1", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.connect_to_sqlite('my-database.sqlite')"}, {"id": "f06c0e89-83f7-5ad1-8f6e-a64cf5bd8e60", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Training\nYou only need to train once. Do not train again unless you want to add more training data."}, {"id": "5d321d01-d66f-5c5e-a3f3-e2d3d4330344", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The information schema query may need some tweaking depending on your database. This is a good starting point.\ndf_information_schema = vn.run_sql(\"SELECT * FROM INFORMATION_SCHEMA.COLUMNS\")\n\n# This will break up the information schema into bite-sized chunks that can be referenced by the LLM\nplan = vn.get_training_plan_generic(df_information_schema)\nplan\n\n# If you like the plan, then uncomment this and run it to train\n# vn.train(plan=plan)\n\n"}, {"id": "7c421f88-42ea-567c-8581-3dcac96c36a3", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "\n# The following are methods for adding training data. Make sure you modify the examples to match your database.\n\n# DDL statements are powerful because they specify table names, colume names, types, and potentially relationships\nvn.train(ddl=\"\"\"\n CREATE TABLE IF NOT EXISTS my-table (\n id INT PRIMARY KEY,\n name VARCHAR(100),\n age INT\n )\n\"\"\")\n\n# Sometimes you may want to add documentation about your business terminology or definitions.\nvn.train(documentation=\"Our business defines OTIF score as the percentage of orders that are delivered on time and in full\")\n\n# You can also add SQL queries to your training data. This is useful if you have some queries already laying around. You can just copy and paste those from your editor to begin generating new SQL.\nvn.train(sql=\"SELECT * FROM my-table WHERE name = 'John Doe'\")\n"}, {"id": "59fcb3b1-4434-583d-82be-ed8e9b04d699", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# At any time you can inspect what training data the package is able to reference\ntraining_data = vn.get_training_data()\ntraining_data"}, {"id": "0dd237e6-ab36-5dd4-9234-e2d25168d50f", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "# You can remove training data if there's obsolete/incorrect information. \nvn.remove_training_data(id='1-ddl')"}, {"id": "bf2fc121-a3ab-5a2e-95b0-383271e82d5f", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Asking the AI\nWhenever you ask a new question, it will find the 10 most relevant pieces of training data and use it as part of the LLM prompt to generate the SQL."}, {"id": "edb6679e-a102-5efc-b890-81babca8f500", "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": "vn.ask(question=...)"}, {"id": "644da41f-f753-5456-81d9-329a3ead11fe", "cell_type": "markdown", "execution_count": null, "metadata": {}, "outputs": [], "source": "## Next Steps\nUsing Vanna via Jupyter notebooks is great for getting started but check out additional interfaces like the \n- [Streamlit app](https://github.com/vanna-ai/vanna-streamlit)\n- [Flask app](https://github.com/vanna-ai/vanna-flask)\n- [Slackbot](https://github.com/vanna-ai/vanna-slack)\n- [Vanna Hosted Web App](https://app.vanna.ai)"}], "metadata": {"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5"}}, "nbformat": 4, "nbformat_minor": 5}
\ No newline at end of file