Skip to content

Commit

Permalink
Switch to new SQLAlchemy dialect for CrateDB
Browse files Browse the repository at this point in the history
This includes the `FloatVector` SQLAlchemy type.
  • Loading branch information
amotl committed Dec 21, 2023
1 parent 582b512 commit 0ec842a
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 157 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## In progress
- Add support for container types `ARRAY`, `OBJECT`, and `FLOAT_VECTOR`.
- Improve write operations to be closer to `target-postgres`.
- Switch to new SQLAlchemy dialect for CrateDB.

## 2023-12-08 v0.0.1
- Make it work. It can run the canonical Meltano GitHub -> DB example.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ dynamic = [
"version",
]
dependencies = [
"crate[sqlalchemy]",
"cratedb-toolkit",
'importlib-resources; python_version < "3.9"', # "meltanolabs-target-postgres==0.0.9",
"meltanolabs-target-postgres@ git+https://github.com/singer-contrib/meltanolabs-target-postgres.git@pgvector",
"sqlalchemy-cratedb[vector]@ git+https://github.com/crate-workbench/sqlalchemy-cratedb@amo/type-float-vector",
]
[project.optional-dependencies]
all = [
Expand Down
8 changes: 6 additions & 2 deletions target_cratedb/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
from datetime import datetime

import sqlalchemy as sa
from crate.client.sqlalchemy.types import ObjectType, ObjectTypeImpl, _ObjectArray
from singer_sdk import typing as th
from singer_sdk.helpers._typing import is_array_type, is_boolean_type, is_integer_type, is_number_type, is_object_type
from sqlalchemy_cratedb.type import FloatVector, ObjectType
from sqlalchemy_cratedb.type.array import _ObjectArray
from sqlalchemy_cratedb.type.object import ObjectTypeImpl
from target_postgres.connector import NOTYPE, PostgresConnector

from target_cratedb.sqlalchemy.patch import polyfill_refresh_after_dml_engine
from target_cratedb.sqlalchemy.vector import FloatVector


class CrateDBConnector(PostgresConnector):
Expand Down Expand Up @@ -225,6 +226,9 @@ def _get_type_sort_key(
if isinstance(sql_type, NOTYPE):
return 0, _len

if not hasattr(sql_type, "python_type"):
raise TypeError(f"Resolving type for sort key failed: {sql_type}")

Check warning on line 230 in target_cratedb/connector.py

View check run for this annotation

Codecov / codecov/patch

target_cratedb/connector.py#L230

Added line #L230 was not covered by tests

_pytype = t.cast(type, sql_type.python_type)
if issubclass(_pytype, (str, bytes)):
return 900, _len
Expand Down
24 changes: 12 additions & 12 deletions target_cratedb/sqlalchemy/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import sqlalchemy as sa
from _decimal import Decimal
from crate.client.http import CrateJsonEncoder
from crate.client.sqlalchemy.dialect import ARRAY, TYPES_MAP, DateTime
from crate.client.sqlalchemy.types import _ObjectArray
from sqlalchemy.sql import sqltypes
from sqlalchemy_cratedb.dialect import TYPES_MAP, DateTime
from sqlalchemy_cratedb.type.array import _ObjectArray


def patch_sqlalchemy():
Expand All @@ -19,20 +18,21 @@ def patch_types():
TODO: Upstream to crate-python.
"""
TYPES_MAP["bigint"] = sqltypes.BIGINT
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.BIGINT)
TYPES_MAP["long"] = sqltypes.BIGINT
TYPES_MAP["long_array"] = ARRAY(sqltypes.BIGINT)
TYPES_MAP["real"] = sqltypes.DOUBLE
TYPES_MAP["real_array"] = ARRAY(sqltypes.DOUBLE)
TYPES_MAP["timestamp without time zone"] = sqltypes.TIMESTAMP
TYPES_MAP["timestamp with time zone"] = sqltypes.TIMESTAMP
# abc()
TYPES_MAP["bigint"] = sa.BIGINT
TYPES_MAP["bigint_array"] = sa.ARRAY(sa.BIGINT)
TYPES_MAP["long"] = sa.BIGINT
TYPES_MAP["long_array"] = sa.ARRAY(sa.BIGINT)
TYPES_MAP["real"] = sa.DOUBLE
TYPES_MAP["real_array"] = sa.ARRAY(sa.DOUBLE)
TYPES_MAP["timestamp without time zone"] = sa.TIMESTAMP
TYPES_MAP["timestamp with time zone"] = sa.TIMESTAMP

# TODO: Can `ARRAY` be inherited from PostgreSQL's
# `ARRAY`, to make type checking work?

def as_generic(self):
return sqltypes.ARRAY
return sa.ARRAY

Check warning on line 35 in target_cratedb/sqlalchemy/patch.py

View check run for this annotation

Codecov / codecov/patch

target_cratedb/sqlalchemy/patch.py#L35

Added line #L35 was not covered by tests

_ObjectArray.as_generic = as_generic

Expand Down
140 changes: 0 additions & 140 deletions target_cratedb/sqlalchemy/vector.py

This file was deleted.

4 changes: 2 additions & 2 deletions target_cratedb/tests/test_standard_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import jsonschema
import pytest
import sqlalchemy as sa
from crate.client.sqlalchemy.types import ObjectTypeImpl
from singer_sdk.exceptions import MissingKeyPropertiesError
from singer_sdk.testing import sync_end_to_end
from sqlalchemy_cratedb.type import FloatVector
from sqlalchemy_cratedb.type.object import ObjectTypeImpl
from target_postgres.tests.samples.aapl.aapl import Fundamentals
from target_postgres.tests.samples.sample_tap_countries.countries_tap import (
SampleTapCountries,
Expand All @@ -20,7 +21,6 @@
from target_cratedb.connector import CrateDBConnector
from target_cratedb.sinks import MELTANO_CRATEDB_STRATEGY_DIRECT
from target_cratedb.sqlalchemy.patch import polyfill_refresh_after_dml_engine
from target_cratedb.sqlalchemy.vector import FloatVector
from target_cratedb.target import TargetCrateDB

try:
Expand Down

0 comments on commit 0ec842a

Please sign in to comment.