diff --git a/target_cratedb/connector.py b/target_cratedb/connector.py index a9d60f3..0b7c5c6 100644 --- a/target_cratedb/connector.py +++ b/target_cratedb/connector.py @@ -167,6 +167,9 @@ def pick_best_sql_type(sql_type_array: list): An instance of the best SQL type class based on defined precedence order. """ precedence_order = [ + sa.ARRAY, + FloatVector, + ObjectTypeImpl, sa.TEXT, sa.TIMESTAMP, sa.DATETIME, @@ -178,9 +181,6 @@ def pick_best_sql_type(sql_type_array: list): sa.INTEGER, sa.BOOLEAN, NOTYPE, - sa.ARRAY, - FloatVector, - ObjectTypeImpl, ] for sql_type in precedence_order: diff --git a/target_cratedb/tests/data_files/commits.singer b/target_cratedb/tests/data_files/commits.singer new file mode 100644 index 0000000..be8ffb6 --- /dev/null +++ b/target_cratedb/tests/data_files/commits.singer @@ -0,0 +1,31 @@ +{ "type": "SCHEMA", "stream": "commits", "schema": { "properties": { "id": { "type": "string" }, "authored_date": { "anyOf": [ { "type": "string", "format": "date-time" }, { "type": "null" } ] }, "committed_date": { "anyOf": [ { "type": "string", "format": "date-time" }, { "type": "null" } ] }, "parent_ids": { "anyOf": [ { "type": "array", "items": { "type": [ "null", "string" ] } }, { "type": "string" }, { "type": "null" } ] }, "commit_message": { "anyOf": [ { "type": "string" }, { "type": "null" } ] }, "legacy_id": { "anyOf": [ { "type": "string" }, { "type": "integer" }, { "type": "null" } ] } }, "type": "object" }, "key_properties": [ "id" ] } +{ "type": "RECORD", "stream": "commits", "record": {"id":"MX-SON","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["PA-KY"],"commit_message":"Public-key","legacy_id":24}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"LA-PH","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["RU-VLG"],"commit_message":"exuding"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"MY-12","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["US-WI","PG-WPD","PF-U-A"],"legacy_id":93}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"MY-15","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["PG-MPL","DO-20"],"commit_message":"Switchable"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"MN-043","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["AU-WA"],"commit_message":"paradigm","legacy_id":"BR-CE"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"MY-16","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["US-MS","ET-GA"],"commit_message":"value-added","legacy_id":62}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"RU-SAK","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["BR-CE","RU-SAK"],"commit_message":"content-based","legacy_id":48}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"RU-MO","authored_date":"2015-09-01T16:34:02","parent_ids":["MX-ROO"],"commit_message":"Grass-roots","legacy_id":7}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"CO-CAQ","authored_date":"2015-09-01T16:34:02","parent_ids":["CA-QC","BR-ES"],"commit_message":"Pre-emptive","legacy_id":54}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"US-AK","authored_date":"2015-09-01T16:34:02","commit_message":"Digitized","legacy_id":74}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"BR-PR","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["CM-LT","BF-SEN"],"commit_message":"Sharable","legacy_id":"CO-CUN"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"CA-BC","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["BR-MG","ID-SU"],"commit_message":"User-centric","legacy_id":43}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"PG-MBA","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["US-PA","AU-QLD","CD-BC"],"commit_message":"archive","legacy_id":52}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"TZ-25","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["AU-QLD","CM-NO","US-LA"],"commit_message":"knowledge user","legacy_id":21}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"MY-13","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["US-CA","AU-VIC"],"commit_message":"Automated","legacy_id":66}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"US-UT","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["CO-CUN"],"commit_message":"access"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"TR-50","authored_date":"2015-09-01T16:34:02","parent_ids":["US-AZ","MH-KWA"],"commit_message":"service-desk","legacy_id":94}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"JP-47","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["GE-TB"],"commit_message":"Adaptive","legacy_id":28}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"PG-MPL","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["US-NE","AR-Z","US-AK"],"commit_message":"Reduced","legacy_id":28}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"PT-20","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["ET-OR"],"commit_message":"Diverse","legacy_id":29}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"PG-SAN","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["IR-02","US-TX","BR-PR"],"commit_message":"Secured","legacy_id":7}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"US-MI","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["US-WA"],"commit_message":"4th generation","legacy_id":26}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"US-NM","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["ID-PA"],"commit_message":"client-driven"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"NO-20","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["ID-YO"],"commit_message":"parallelism"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"AR-R","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["CN-13"],"commit_message":"hierarchy","legacy_id":"AU-QLD"}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"US-IN","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["BR-SP"],"commit_message":"concept","legacy_id":31}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"FR-J","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["UA-05"],"commit_message":"solution","legacy_id":47}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"PA-8","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["CD-MA","AU-NT","US-CA"],"commit_message":"Down-sized","legacy_id":25}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"US-IL","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["VE-J"],"commit_message":"Adaptive","legacy_id":85}} +{ "type": "RECORD", "stream": "commits", "record": {"id":"MY-14","authored_date":"2015-09-01T16:34:02","committed_date":"2015-09-01T16:34:02","parent_ids":["DE-BR","MN-067"],"commit_message":"contingency","legacy_id":73}} diff --git a/target_cratedb/tests/test_standard_target.py b/target_cratedb/tests/test_standard_target.py index ca55faf..36d1857 100644 --- a/target_cratedb/tests/test_standard_target.py +++ b/target_cratedb/tests/test_standard_target.py @@ -131,7 +131,12 @@ def singer_file_to_target(file_name, target) -> None: file_name: name to file in .tests/data_files to be sent into target Target: Target to pass data from file_path into.. """ - file_path = resource_files("target_postgres.tests") / "data_files" / file_name + file_path_local = resource_files("target_cratedb.tests") / "data_files" / file_name + file_path_upstream = resource_files("target_postgres.tests") / "data_files" / file_name + if file_path_local.exists(): + file_path = file_path_local + else: + file_path = file_path_upstream buf = io.StringIO() with redirect_stdout(buf): with open(file_path) as f: @@ -555,8 +560,6 @@ def test_large_int(cratedb_target): singer_file_to_target(file_name, cratedb_target) -# SQLParseException[Cannot cast value `[PA-KY]` to type `text`] -@pytest.mark.skip("`parent_ids STRING` is rendered, but it needs to be `parent_ids ARRAY(STRING())`") def test_anyof(cratedb_target): """Test that anyOf is handled correctly""" engine = create_engine(cratedb_target) @@ -571,7 +574,10 @@ def test_anyof(cratedb_target): for column in table.c: # {"type":"string"} if column.name == "id": - assert isinstance(column.type, sa.TEXT) + # TODO: CrateDB needs `(sa.TEXT, sa.String)` here. + # The original is fine with `sa.TEXT`, so review + # the dialect please. Discovered through `test_anyof`. + assert isinstance(column.type, (sa.TEXT, sa.String)) # Any of nullable date-time. # Note that postgres timestamp is equivalent to jsonschema date-time. @@ -586,13 +592,15 @@ def test_anyof(cratedb_target): # Any of nullable string. # {"anyOf":[{"type":"string"},{"type":"null"}]} + # TODO: See above. if column.name == "commit_message": - assert isinstance(column.type, sa.TEXT) + assert isinstance(column.type, (sa.TEXT, sa.String)) # Any of nullable string or integer. # {"anyOf":[{"type":"string"},{"type":"integer"},{"type":"null"}]} + # TODO: See above. if column.name == "legacy_id": - assert isinstance(column.type, sa.TEXT) + assert isinstance(column.type, (sa.TEXT, sa.String)) def test_new_array_column(cratedb_target):