Sinaptik-AI · gventuri · Sep 21, 2023 · Sep 20, 2023 · Sep 20, 2023 · Sep 20, 2023
diff --git a/examples/from_databricks.py b/examples/from_databricks.py
@@ -0,0 +1,27 @@
+"""Example of using PandasAI with a Snowflake"""
+
+from pandasai import SmartDataframe
+from pandasai.llm import OpenAI
+from pandasai.connectors import DatabricksConnector
+
+databricks_connector = DatabricksConnector(
+    config={
+        "host": "adb-*****.azuredatabricks.net",
+        "database": "default",
+        "token": "dapidfd412321",
+        "port": 443,
+        "table": "loan_payments_data",
+        "httpPath": "/sql/1.0/warehouses/213421312",
+        "where": [
+            # this is optional and filters the data to
+            # reduce the size of the dataframe
+            ["loan_status", "=", "PAIDOFF"],
+        ],
+    }
+)
+
+llm = OpenAI("OPEN_API_KEY")
+df = SmartDataframe(databricks_connector, config={"llm": llm})
+
+response = df.chat("How many people from the United states?")
+print(response)
diff --git a/pandasai/connectors/__init__.py b/pandasai/connectors/__init__.py
@@ -7,6 +7,7 @@
 from .base import BaseConnector
 from .sql import SQLConnector, MySQLConnector, PostgreSQLConnector
 from .snowflake import SnowFlakeConnector
+from .databricks import DatabricksConnector
 from .yahoo_finance import YahooFinanceConnector
 
 __all__ = [
@@ -16,4 +17,5 @@
     "PostgreSQLConnector",
     "YahooFinanceConnector",
     "SnowFlakeConnector",
+    "DatabricksConnector",
 ]
diff --git a/pandasai/connectors/base.py b/pandasai/connectors/base.py
@@ -63,6 +63,17 @@ class SnowFlakeConnectorConfig(SQLBaseConnectorConfig):
     warehouse: str
 
 
+class DatabricksConnectorConfig(SQLBaseConnectorConfig):
+    """
+    Connector configuration for DataBricks.
+    """
+
+    host: str
+    port: int
+    token: str
+    httpPath: str
+
+
 class BaseConnector(ABC):
     """
     Base connector class to be extended by all connectors.

diff --git a/pandasai/connectors/databricks.py b/pandasai/connectors/databricks.py
@@ -0,0 +1,69 @@
+"""
+Databricks Connector to connects you to your Databricks SQL Warhouse on
+Azure, AWS and GCP
+"""
+
+import os
+from .base import BaseConnectorConfig, DatabricksConnectorConfig
+from sqlalchemy import create_engine
+from typing import Union
+from .sql import SQLConnector
+
+
+class DatabricksConnector(SQLConnector):
+    """
+    SnowFlake connectors are used to connect to SnowFlake Data Cloud.
+    """
+
+    def __init__(self, config: DatabricksConnectorConfig):
+        """
+        Initialize the SnowFlake connector with the given configuration.
+
+        Args:
+            config (ConnectorConfig): The configuration for the SnowFlake connector.
+        """
+        config["dialect"] = "databricks"
+
+        if "token" not in config and os.getenv("DATABRICKS_TOKEN"):
+            config["token"] = os.getenv("DATABRICKS_TOKEN")
+        if "database" not in config and os.getenv("SNOWFLAKE_DATABASE"):
+            config["database"] = os.getenv("SNOWFLAKE_DATABASE")
+        if "host" not in config and os.getenv("DATABRICKS_HOST"):
+            config["host"] = os.getenv("DATABRICKS_HOST")
+        if "port" not in config and os.getenv("DATABRICKS_PORT"):
+            config["port"] = os.getenv("DATABRICKS_PORT")
+        if "httpPath" not in config and os.getenv("DATABRICKS_HTTP_PATH"):
+            config["httpPath"] = os.getenv("DATABRICKS_HTTP_PATH")
+
+        super().__init__(config)
+
+    def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]):
+        return DatabricksConnectorConfig(**config)
+
+    def _init_connection(self, config: DatabricksConnectorConfig):
+        """
+        Initialize Database Connection
+
+        Args:
+            config (SQLConnectorConfig): Configurations to load database
+
+        """
+        self._engine = create_engine(
+            f"{config.dialect}://token:{config.token}@{config.host}:{config.port}?http_path={config.httpPath}"
+        )
+
+        self._connection = self._engine.connect()
+
+    def __repr__(self):
+        """
+        Return the string representation of the SnowFlake connector.
+
+        Returns:
+        str: The string representation of the SnowFlake connector.
+        """
+        return (
+            f"<{self.__class__.__name__} dialect={self._config.dialect} "
+            f"token={self._config.token} "
+            f"host={self._config.host} port={self._config.port} "
+            f"database={self._config.database} httpPath={str(self._config.httpPath)}"
+        )
diff --git a/pandasai/connectors/sql.py b/pandasai/connectors/sql.py
@@ -279,10 +279,7 @@ def rows_count(self):
             )
 
         # Run a SQL query to get the number of rows
-        query = sql.text(
-            "SELECT COUNT(*) FROM information_schema.columns "
-            "WHERE table_name = :table_name"
-        ).bindparams(table_name=self._config.table)
+        query = sql.text(f"SELECT COUNT(*) FROM {self._config.table}")
 
         # Return the number of rows
         self._rows_count = self._connection.execute(query).fetchone()[0]
@@ -307,14 +304,7 @@ def columns_count(self):
                 f"{self._config.dialect}"
             )
 
-        # Run a SQL query to get the number of columns
-        query = sql.text(
-            "SELECT COUNT(*) FROM information_schema.columns "
-            f"WHERE table_name = '{self._config.table}'"
-        )
-
-        # Return the number of columns
-        self._columns_count = self._connection.execute(query).fetchone()[0]
+        self._columns_count = len(self.head().columns)
         return self._columns_count
 
     def _get_column_hash(self, include_additional_filters: bool = False):