Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate API of ibis #624

Merged
merged 2 commits into from
Jun 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ibis-server/app/dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from app.model import QueryDTO
from app.model.data_source import DataSource


# Rebuild model to validate the dto is correct via validation of the pydantic
def verify_query_dto(data_source: DataSource, dto: QueryDTO):
data_source.get_dto_type()(**dto.model_dump(by_alias=True))
91 changes: 91 additions & 0 deletions ibis-server/app/model/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from __future__ import annotations

from typing import Union

from pydantic import BaseModel, Field

manifest_str_field = Field(alias="manifestStr", description="Base64 manifest")
connection_info_field = Field(alias="connectionInfo")


class QueryDTO(BaseModel):
sql: str
manifest_str: str = manifest_str_field
column_dtypes: dict[str, str] | None = Field(
alias="columnDtypes",
description="If this field is set, it will forcibly convert the type.",
default=None,
)
connection_info: ConnectionInfo = connection_info_field


class QueryBigQueryDTO(QueryDTO):
connection_info: BigQueryConnectionInfo = connection_info_field


class QueryMySqlDTO(QueryDTO):
connection_info: ConnectionUrl | MySqlConnectionInfo = connection_info_field


class QueryPostgresDTO(QueryDTO):
connection_info: ConnectionUrl | PostgresConnectionInfo = connection_info_field


class QuerySnowflakeDTO(QueryDTO):
connection_info: SnowflakeConnectionInfo = connection_info_field


class BigQueryConnectionInfo(BaseModel):
project_id: str
dataset_id: str
credentials: str = Field(description="Base64 encode `credentials.json`")


class MySqlConnectionInfo(BaseModel):
host: str
port: int
database: str
user: str
password: str


class ConnectionUrl(BaseModel):
connection_url: str = Field(alias="connectionUrl")


class PostgresConnectionInfo(BaseModel):
host: str = Field(examples=["localhost"])
port: int = Field(examples=[5432])
database: str
user: str
password: str


class SnowflakeConnectionInfo(BaseModel):
user: str
password: str
account: str
database: str
sf_schema: str = Field(
alias="schema"
) # Use `sf_schema` to avoid `schema` shadowing in BaseModel


ConnectionInfo = Union[
BigQueryConnectionInfo,
ConnectionUrl,
MySqlConnectionInfo,
PostgresConnectionInfo,
SnowflakeConnectionInfo,
]


class ValidateDTO(BaseModel):
manifest_str: str = manifest_str_field
parameters: dict[str, str]
connection_info: ConnectionInfo = connection_info_field


class AnalyzeSQLDTO(BaseModel):
manifest_str: str = manifest_str_field
sql: str
43 changes: 2 additions & 41 deletions ibis-server/app/model/connector.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
from json import loads

import pandas as pd
from pydantic import BaseModel, Field

from app.mdl.rewriter import rewrite
from app.model.data_source import (
BigQueryConnectionInfo,
ConnectionInfo,
DataSource,
MySqlConnectionUrl,
MySqlConnectionInfo,
PostgresConnectionUrl,
PostgresConnectionInfo,
SnowflakeConnectionInfo,
)
from app.model import ConnectionInfo
from app.model.data_source import DataSource


class Connector:
Expand Down Expand Up @@ -67,35 +58,5 @@ def _to_datetime_and_format(series: pd.Series) -> pd.Series:
)


class QueryDTO(BaseModel):
sql: str
manifest_str: str = Field(alias="manifestStr", description="Base64 manifest")
column_dtypes: dict[str, str] | None = Field(
alias="columnDtypes",
description="If this field is set, it will forcibly convert the type.",
default=None,
)


class QueryBigQueryDTO(QueryDTO):
connection_info: BigQueryConnectionInfo = Field(alias="connectionInfo")


class QueryMySqlDTO(QueryDTO):
connection_info: MySqlConnectionUrl | MySqlConnectionInfo = Field(
alias="connectionInfo"
)


class QueryPostgresDTO(QueryDTO):
connection_info: PostgresConnectionUrl | PostgresConnectionInfo = Field(
alias="connectionInfo"
)


class QuerySnowflakeDTO(QueryDTO):
connection_info: SnowflakeConnectionInfo = Field(alias="connectionInfo")


class QueryDryRunError(Exception):
pass
111 changes: 44 additions & 67 deletions ibis-server/app/model/data_source.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
from __future__ import annotations

import base64
from enum import StrEnum, auto
from enum import Enum, StrEnum, auto
from json import loads
from typing import Union

import ibis
from google.oauth2 import service_account
from ibis import BaseBackend
from pydantic import BaseModel, Field

from app.model import (
BigQueryConnectionInfo,
ConnectionInfo,
ConnectionUrl,
MySqlConnectionInfo,
PostgresConnectionInfo,
QueryBigQueryDTO,
QueryDTO,
QueryMySqlDTO,
QueryPostgresDTO,
QuerySnowflakeDTO,
SnowflakeConnectionInfo,
)


class DataSource(StrEnum):
Expand All @@ -17,18 +29,33 @@ class DataSource(StrEnum):
postgres = auto()
snowflake = auto()

def get_connection(self, dto) -> BaseBackend:
match self:
case DataSource.bigquery:
return self.get_bigquery_connection(dto)
case DataSource.mysql:
return self.get_mysql_connection(dto)
case DataSource.postgres:
return self.get_postgres_connection(dto)
case DataSource.snowflake:
return self.get_snowflake_connection(dto)
case _:
raise NotImplementedError(f"Unsupported data source: {self}")
def get_connection(self, info: ConnectionInfo) -> BaseBackend:
try:
return DataSourceExtension[self].get_connection(info)
except KeyError:
raise NotImplementedError(f"Unsupported data source: {self}")

def get_dto_type(self):
try:
return DataSourceExtension[self].dto
except KeyError:
raise NotImplementedError(f"Unsupported data source: {self}")


class DataSourceExtension(Enum):
bigquery = QueryBigQueryDTO
mysql = QueryMySqlDTO
postgres = QueryPostgresDTO
snowflake = QuerySnowflakeDTO

def __init__(self, dto: QueryDTO):
self.dto = dto

def get_connection(self, info: ConnectionInfo) -> BaseBackend:
try:
return getattr(self, f"get_{self.name}_connection")(info)
except KeyError:
raise NotImplementedError(f"Unsupported data source: {self}")

@staticmethod
def get_bigquery_connection(info: BigQueryConnectionInfo) -> BaseBackend:
Expand All @@ -44,7 +71,7 @@ def get_bigquery_connection(info: BigQueryConnectionInfo) -> BaseBackend:

@staticmethod
def get_mysql_connection(
info: MySqlConnectionUrl | MySqlConnectionInfo,
info: ConnectionUrl | MySqlConnectionInfo,
) -> BaseBackend:
return ibis.connect(
getattr(info, "connection_url", None)
Expand All @@ -54,7 +81,7 @@ def get_mysql_connection(

@staticmethod
def get_postgres_connection(
info: PostgresConnectionUrl | PostgresConnectionInfo,
info: ConnectionUrl | PostgresConnectionInfo,
) -> BaseBackend:
return ibis.connect(
getattr(info, "connection_url", None)
Expand All @@ -70,53 +97,3 @@ def get_snowflake_connection(info: SnowflakeConnectionInfo) -> BaseBackend:
database=info.database,
schema=info.sf_schema,
)


class BigQueryConnectionInfo(BaseModel):
project_id: str
dataset_id: str
credentials: str = Field(description="Base64 encode `credentials.json`")


class MySqlConnectionInfo(BaseModel):
host: str
port: int
database: str
user: str
password: str


class MySqlConnectionUrl(BaseModel):
connection_url: str = Field(alias="connectionUrl")


class PostgresConnectionInfo(BaseModel):
host: str = Field(examples=["localhost"])
port: int = Field(examples=[5432])
database: str
user: str
password: str


class PostgresConnectionUrl(BaseModel):
connection_url: str = Field(alias="connectionUrl")


class SnowflakeConnectionInfo(BaseModel):
user: str
password: str
account: str
database: str
sf_schema: str = Field(
alias="schema"
) # Use `sf_schema` to avoid `schema` shadowing in BaseModel


ConnectionInfo = Union[
BigQueryConnectionInfo,
MySqlConnectionInfo,
MySqlConnectionUrl,
PostgresConnectionInfo,
PostgresConnectionUrl,
SnowflakeConnectionInfo,
]
8 changes: 4 additions & 4 deletions ibis-server/app/model/metadata/bigquery.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from json import loads

from app.model.data_source import BigQueryConnectionInfo
from app.model import BigQueryConnectionInfo
from app.model.data_source import DataSource
from app.model.metadata.dto import (
Table,
Constraint,
TableProperties,
Column,
Constraint,
ConstraintType,
Table,
TableProperties,
)
from app.model.metadata.metadata import Metadata

Expand Down
4 changes: 2 additions & 2 deletions ibis-server/app/model/metadata/dto.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from enum import Enum
from typing import List, Optional, Dict, Any
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field

from app.model.data_source import ConnectionInfo
from app.model import ConnectionInfo


class MetadataDTO(BaseModel):
Expand Down
11 changes: 6 additions & 5 deletions ibis-server/app/model/metadata/factory.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from json import loads

from app.model.data_source import DataSource, ConnectionInfo
from app.model import ConnectionInfo
from app.model.data_source import DataSource
from app.model.metadata.bigquery import BigQueryMetadata
from app.model.metadata.metadata import Metadata
from app.model.metadata.postgres import PostgresMetadata
from app.model.metadata.mysql import MySQLMetadata
from app.model.metadata.dto import (
Table,
Constraint,
Table,
)
from app.model.metadata.metadata import Metadata
from app.model.metadata.mysql import MySQLMetadata
from app.model.metadata.postgres import PostgresMetadata


class MetadataFactory:
Expand Down
4 changes: 2 additions & 2 deletions ibis-server/app/model/metadata/metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from app.model.connector import ConnectionInfo
from app.model.metadata.dto import Table, Constraint
from app.model import ConnectionInfo
from app.model.metadata.dto import Constraint, Table


class Metadata:
Expand Down
9 changes: 5 additions & 4 deletions ibis-server/app/model/metadata/mysql.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from json import loads

from app.model.data_source import DataSource, MySqlConnectionInfo
from app.model import MySqlConnectionInfo
from app.model.data_source import DataSource
from app.model.metadata.dto import (
Table,
Constraint,
TableProperties,
Column,
Constraint,
ConstraintType,
Table,
TableProperties,
WrenEngineColumnType,
)
from app.model.metadata.metadata import Metadata
Expand Down
Loading
Loading