diff --git a/.gitignore b/.gitignore index c79d55d..f1eb116 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .idea .venv* *.egg-info +build coverage.xml diff --git a/docs/backlog.md b/docs/backlog.md new file mode 100644 index 0000000..9e6c5bd --- /dev/null +++ b/docs/backlog.md @@ -0,0 +1,32 @@ +# Backlog + +## Iteration +1 +- Unlock configuring dedicated database schema, not just `melty`. + Why doesn't `"crate://crate@localhost/?schema=foo"` work? Confirm this? +- Submit a few patches to `meltanolabs-target-postgres`, about proper + quoting of schema and table names. +- Submit a few other patches to crate-python, in order to clean up here. +- Release v0.0.1 +- Submit registration to Meltano Hub + +## Iteration +2 +- Venerable schema name propagation flaw hits again, but differently? + ``` + TypeError: PostgresConnector.get_table_columns() got an unexpected keyword argument 'full_table_name' + ``` + +## Obstacles +Upstream some workarounds to crate-python. +- `TypeError: Invalid argument(s) 'json_serializer','json_deserializer' sent to create_engine(), using configuration CrateDialect/QueuePool/Engine. Please check that the keyword arguments are appropriate for this combination of components.` +- `UnsupportedFeatureException[Cannot use columns of type "object" as primary key]` +- `NotImplementedError: Default TypeEngine.as_generic() heuristic method was unsuccessful for crate.client.sqlalchemy.types._ObjectArray. A custom as_generic() method must be implemented for this type class.` +- `sqlalchemy.exc.DBAPIError: (crate.client.exceptions.TimezoneUnawareException) Timezone aware datetime objects are not supported` +- `NotImplementedError: This backend does not support multiple-table criteria within UPDATE` +- `ColumnValidationException[Validation failed for code: Updating a primary key is not supported]` + +## Notes +- Missing `CREATE SCHEMA` is tedious, and currently needs a workaround. + + +## Done +- Document use with CrateDB Cloud. diff --git a/pyproject.toml b/pyproject.toml index 0c26e0b..d5e7670 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,21 +11,22 @@ default-tag = "0.0.0" [project] name = "meltano-target-cratedb" -description = "A Singer target for CrateDB, built with the Meltano SDK for Singer Targets, based on the PostgreSQL target" +description = "A Singer target for CrateDB, built with the Meltano SDK, and based on the Meltano PostgreSQL target." readme = "README.md" keywords = [ "cratedb", "data-loading", "data-processing", + "data-toolkit", "data-transfer", + "data-transformation", "ELT", "ETL", "io", "Meltano", "Meltano SDK", - "Postgres", + "PostgreSQL", "Singer", - "toolkit", ] license = { text = "MIT" } authors = [ diff --git a/target_cratedb/connector.py b/target_cratedb/connector.py index c7445fe..63a0745 100644 --- a/target_cratedb/connector.py +++ b/target_cratedb/connector.py @@ -35,6 +35,11 @@ class CrateDBConnector(PostgresConnector): allow_temp_tables: bool = False # Whether temp tables are supported. def create_engine(self) -> sqlalchemy.Engine: + """ + Create an SQLAlchemy engine object. + + Note: Needs to be patched to establish a polyfill which will synchronize write operations. + """ engine = super().create_engine() polyfill_refresh_after_dml_engine(engine) return engine @@ -43,6 +48,8 @@ def create_engine(self) -> sqlalchemy.Engine: def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine: """Return a JSON Schema representation of the provided type. + Note: Needs to be patched to invoke other static methods on `CrateDBConnector`. + By default will call `typing.to_sql_type()`. Developers may override this method to accept additional input argument types, @@ -89,6 +96,8 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine: def pick_individual_type(jsonschema_type: dict): """Select the correct sql type assuming jsonschema_type has only a single type. + Note: Needs to be patched to supply handlers for `object` and `array`. + Args: jsonschema_type: A jsonschema_type array containing only a single type. @@ -115,6 +124,8 @@ def pick_individual_type(jsonschema_type: dict): def pick_best_sql_type(sql_type_array: list): """Select the best SQL type from an array of instances of SQL type classes. + Note: Needs to be patched to supply handler for `ObjectTypeImpl`. + Args: sql_type_array: The array of instances of SQL type classes. @@ -152,6 +163,8 @@ def _sort_types( ) -> list[sqlalchemy.types.TypeEngine]: """Return the input types sorted from most to least compatible. + Note: Needs to be patched to supply handlers for `_ObjectArray` and `NOTYPE`. + For example, [Smallint, Integer, Datetime, String, Double] would become [Unicode, String, Double, Integer, Smallint, Datetime]. @@ -201,6 +214,8 @@ def copy_table_structure( ) -> sqlalchemy.Table: """Copy table structure. + Note: Needs to be patched to prevent `Primary key columns cannot be nullable` errors. + Args: full_table_name: the target table name potentially including schema from_table: the source table @@ -224,3 +239,9 @@ def copy_table_structure( new_table = sqlalchemy.Table(table_name, meta, *columns) new_table.create(bind=connection) return new_table + + def prepare_schema(self, schema_name: str) -> None: + """ + Don't emit `CREATE SCHEMA` statements to CrateDB. + """ + pass