From 027130def75b335c22a9c741e370d710ae4cf974 Mon Sep 17 00:00:00 2001 From: eryanRM Date: Tue, 7 Nov 2023 15:51:31 -0800 Subject: [PATCH] synapse escape chars and test utils update (reset w/ devel) --- dlt/common/data_writers/escape.py | 36 +++++++++++++++++++++++++++++++ tests/load/utils.py | 2 ++ tests/utils.py | 6 ++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/dlt/common/data_writers/escape.py b/dlt/common/data_writers/escape.py index 0656a69634..218703cbb5 100644 --- a/dlt/common/data_writers/escape.py +++ b/dlt/common/data_writers/escape.py @@ -97,6 +97,42 @@ def escape_mssql_literal(v: Any) -> Any: return str(v) +# TODO needs improvement for SQL injection, combine with mssql handling +def escape_synapse_literal(v: Any) -> Any: + if isinstance(v, str): + # Use the _escape_extended function to escape the string + return _escape_extended(v, prefix="N'", escape_dict=SYNAPSE_ESCAPE_DICT) + if isinstance(v, (datetime, date, time)): + return f"'{v.isoformat()}'" + if isinstance(v, (list, dict)): + # Serialize the list or dict to JSON and then escape it + return _escape_extended(json.dumps(v), prefix="N'", escape_dict=SYNAPSE_ESCAPE_DICT) + if isinstance(v, bytes): + hex_string = v.hex() + return f"0x{hex_string}" + if isinstance(v, bool): + return str(int(v)) + if v is None: + return "NULL" + return str(v) + + +# TODO potentially combine with mssql +SYNAPSE_ESCAPE_DICT = { + "'": "''", + '\n': "' + CHAR(10) + N'", + '\r': "' + CHAR(13) + N'", + '\t': "' + CHAR(9) + N'", + "\\": "\\", +} + +SYNAPSE_SQL_ESCAPE_RE = _make_sql_escape_re(SYNAPSE_ESCAPE_DICT) + + +def escape_synapse_identifier(v: str) -> str: + return '"' + v.replace('"', '') + '"' + + def escape_redshift_identifier(v: str) -> str: return '"' + v.replace('"', '""').replace("\\", "\\\\") + '"' diff --git a/tests/load/utils.py b/tests/load/utils.py index be2097c879..ed626c54eb 100644 --- a/tests/load/utils.py +++ b/tests/load/utils.py @@ -147,6 +147,7 @@ def destinations_configs( DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, stage_name="PUBLIC.dlt_s3_stage", extra_info="s3-integration"), DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AZ_BUCKET, stage_name="PUBLIC.dlt_az_stage", extra_info="az-integration"), DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AZ_BUCKET, extra_info="az-authorization"), + DestinationTestConfiguration(destination="synapse", staging="filesystem", file_format="parquet", bucket_url=AZ_BUCKET, stage_name="PUBLIC.dlt_az_stage") ] if all_staging_configs: @@ -155,6 +156,7 @@ def destinations_configs( DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="parquet", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"), DestinationTestConfiguration(destination="redshift", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"), DestinationTestConfiguration(destination="bigquery", staging="filesystem", file_format="jsonl", bucket_url=GCS_BUCKET, extra_info="gcs-authorization"), + DestinationTestConfiguration(destination="synapse", staging="filesystem", file_format="parquet", bucket_url=AZ_BUCKET, extra_info="az-integration") ] # add local filesystem destinations if requested diff --git a/tests/utils.py b/tests/utils.py index 823b1cca83..9f75017e6d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -29,7 +29,7 @@ # destination constants -IMPLEMENTED_DESTINATIONS = {"athena", "duckdb", "bigquery", "redshift", "postgres", "snowflake", "filesystem", "weaviate", "dummy", "motherduck", "mssql", "qdrant"} +IMPLEMENTED_DESTINATIONS = {"athena", "duckdb", "bigquery", "redshift", "postgres", "snowflake", "filesystem", "weaviate", "dummy", "motherduck", "mssql", "qdrant", "synapse"} NON_SQL_DESTINATIONS = {"filesystem", "weaviate", "dummy", "motherduck", "qdrant"} SQL_DESTINATIONS = IMPLEMENTED_DESTINATIONS - NON_SQL_DESTINATIONS @@ -38,7 +38,9 @@ # filter out active destinations for current tests -ACTIVE_DESTINATIONS = set(dlt.config.get("ACTIVE_DESTINATIONS", list) or IMPLEMENTED_DESTINATIONS) +# TODO remove filter +# ACTIVE_DESTINATIONS = set(dlt.config.get("ACTIVE_DESTINATIONS", list) or IMPLEMENTED_DESTINATIONS) +ACTIVE_DESTINATIONS = {"synapse"} ACTIVE_SQL_DESTINATIONS = SQL_DESTINATIONS.intersection(ACTIVE_DESTINATIONS) ACTIVE_NON_SQL_DESTINATIONS = NON_SQL_DESTINATIONS.intersection(ACTIVE_DESTINATIONS)