Skip to content

Commit

Permalink
synapse escape chars and test utils update (reset w/ devel)
Browse files Browse the repository at this point in the history
  • Loading branch information
eryanRM committed Nov 7, 2023
1 parent e92cf75 commit 027130d
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 2 deletions.
36 changes: 36 additions & 0 deletions dlt/common/data_writers/escape.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,42 @@ def escape_mssql_literal(v: Any) -> Any:
return str(v)


# TODO needs improvement for SQL injection, combine with mssql handling
def escape_synapse_literal(v: Any) -> Any:
if isinstance(v, str):
# Use the _escape_extended function to escape the string
return _escape_extended(v, prefix="N'", escape_dict=SYNAPSE_ESCAPE_DICT)
if isinstance(v, (datetime, date, time)):
return f"'{v.isoformat()}'"
if isinstance(v, (list, dict)):
# Serialize the list or dict to JSON and then escape it
return _escape_extended(json.dumps(v), prefix="N'", escape_dict=SYNAPSE_ESCAPE_DICT)
if isinstance(v, bytes):
hex_string = v.hex()
return f"0x{hex_string}"
if isinstance(v, bool):
return str(int(v))
if v is None:
return "NULL"
return str(v)


# TODO potentially combine with mssql
SYNAPSE_ESCAPE_DICT = {
"'": "''",
'\n': "' + CHAR(10) + N'",
'\r': "' + CHAR(13) + N'",
'\t': "' + CHAR(9) + N'",
"\\": "\\",
}

SYNAPSE_SQL_ESCAPE_RE = _make_sql_escape_re(SYNAPSE_ESCAPE_DICT)


def escape_synapse_identifier(v: str) -> str:
return '"' + v.replace('"', '') + '"'


def escape_redshift_identifier(v: str) -> str:
return '"' + v.replace('"', '""').replace("\\", "\\\\") + '"'

Expand Down
2 changes: 2 additions & 0 deletions tests/load/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def destinations_configs(
DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, stage_name="PUBLIC.dlt_s3_stage", extra_info="s3-integration"),
DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AZ_BUCKET, stage_name="PUBLIC.dlt_az_stage", extra_info="az-integration"),
DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="jsonl", bucket_url=AZ_BUCKET, extra_info="az-authorization"),
DestinationTestConfiguration(destination="synapse", staging="filesystem", file_format="parquet", bucket_url=AZ_BUCKET, stage_name="PUBLIC.dlt_az_stage")
]

if all_staging_configs:
Expand All @@ -155,6 +156,7 @@ def destinations_configs(
DestinationTestConfiguration(destination="snowflake", staging="filesystem", file_format="parquet", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"),
DestinationTestConfiguration(destination="redshift", staging="filesystem", file_format="jsonl", bucket_url=AWS_BUCKET, extra_info="credential-forwarding"),
DestinationTestConfiguration(destination="bigquery", staging="filesystem", file_format="jsonl", bucket_url=GCS_BUCKET, extra_info="gcs-authorization"),
DestinationTestConfiguration(destination="synapse", staging="filesystem", file_format="parquet", bucket_url=AZ_BUCKET, extra_info="az-integration")
]

# add local filesystem destinations if requested
Expand Down
6 changes: 4 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@


# destination constants
IMPLEMENTED_DESTINATIONS = {"athena", "duckdb", "bigquery", "redshift", "postgres", "snowflake", "filesystem", "weaviate", "dummy", "motherduck", "mssql", "qdrant"}
IMPLEMENTED_DESTINATIONS = {"athena", "duckdb", "bigquery", "redshift", "postgres", "snowflake", "filesystem", "weaviate", "dummy", "motherduck", "mssql", "qdrant", "synapse"}
NON_SQL_DESTINATIONS = {"filesystem", "weaviate", "dummy", "motherduck", "qdrant"}
SQL_DESTINATIONS = IMPLEMENTED_DESTINATIONS - NON_SQL_DESTINATIONS

Expand All @@ -38,7 +38,9 @@


# filter out active destinations for current tests
ACTIVE_DESTINATIONS = set(dlt.config.get("ACTIVE_DESTINATIONS", list) or IMPLEMENTED_DESTINATIONS)
# TODO remove filter
# ACTIVE_DESTINATIONS = set(dlt.config.get("ACTIVE_DESTINATIONS", list) or IMPLEMENTED_DESTINATIONS)
ACTIVE_DESTINATIONS = {"synapse"}

ACTIVE_SQL_DESTINATIONS = SQL_DESTINATIONS.intersection(ACTIVE_DESTINATIONS)
ACTIVE_NON_SQL_DESTINATIONS = NON_SQL_DESTINATIONS.intersection(ACTIVE_DESTINATIONS)
Expand Down

0 comments on commit 027130d

Please sign in to comment.