-
Notifications
You must be signed in to change notification settings - Fork 186
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #660 from dlt-hub/rfix/duck-case
Rfix/duck case
- Loading branch information
Showing
19 changed files
with
166 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,24 @@ | ||
import re | ||
from functools import lru_cache | ||
|
||
from dlt.common.normalizers.naming.snake_case import NamingConvention as BaseNamingConvention | ||
from dlt.common.normalizers.naming.snake_case import NamingConvention as SnakeCaseNamingConvention | ||
|
||
|
||
class NamingConvention(BaseNamingConvention): | ||
class NamingConvention(SnakeCaseNamingConvention): | ||
|
||
_RE_NON_ALPHANUMERIC = re.compile(r"[^a-zA-Z\d_+-]+") | ||
_REDUCE_ALPHABET = ("*@|", "xal") | ||
_TR_REDUCE_ALPHABET = str.maketrans(_REDUCE_ALPHABET[0], _REDUCE_ALPHABET[1]) | ||
_CLEANUP_TABLE = str.maketrans("\n\r\"", "___") | ||
_RE_LEADING_DIGITS = None # do not remove leading digits | ||
|
||
@staticmethod | ||
@lru_cache(maxsize=None) | ||
def _normalize_identifier(identifier: str, max_length: int) -> str: | ||
"""Normalizes the identifier according to naming convention represented by this function""" | ||
# all characters that are not letters digits or a few special chars are replaced with underscore | ||
normalized_ident = identifier.translate(NamingConvention._TR_REDUCE_ALPHABET) | ||
normalized_ident = NamingConvention._RE_NON_ALPHANUMERIC.sub("_", normalized_ident) | ||
|
||
normalized_ident = identifier.translate(NamingConvention._CLEANUP_TABLE) | ||
|
||
# shorten identifier | ||
return NamingConvention.shorten_identifier( | ||
NamingConvention._to_snake_case(normalized_ident), | ||
NamingConvention._RE_UNDERSCORES.sub("_", normalized_ident), | ||
identifier, | ||
max_length | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import pytest | ||
import os | ||
|
||
import dlt | ||
from dlt.destinations.exceptions import DatabaseTerminalException | ||
from dlt.pipeline.exceptions import PipelineStepFailed | ||
|
||
from tests.pipeline.utils import airtable_emojis | ||
from tests.load.pipeline.utils import destinations_configs, DestinationTestConfiguration, load_table_counts | ||
|
||
|
||
@pytest.mark.parametrize("destination_config", destinations_configs(default_sql_configs=True, subset=["duckdb"]), ids=lambda x: x.name) | ||
def test_duck_case_names(destination_config: DestinationTestConfiguration) -> None: | ||
# we want to have nice tables | ||
# dlt.config["schema.naming"] = "duck_case" | ||
os.environ["SCHEMA__NAMING"] = "duck_case" | ||
pipeline = destination_config.setup_pipeline("test_duck_case_names") | ||
# create tables and columns with emojis and other special characters | ||
pipeline.run(airtable_emojis().with_resources("📆 Schedule", "🦚Peacock", "🦚WidePeacock")) | ||
pipeline.run([{"🐾Feet": 2, "1+1": "two", "\nhey": "value"}], table_name="🦚Peacocks🦚") | ||
table_counts = load_table_counts(pipeline, *[t["name"] for t in pipeline.default_schema.data_tables()]) | ||
assert table_counts == { | ||
"📆 Schedule": 3, | ||
"🦚Peacock": 1, | ||
'🦚Peacock__peacock': 3, | ||
'🦚Peacocks🦚': 1, | ||
'🦚WidePeacock': 1, | ||
'🦚WidePeacock__peacock': 3 | ||
} | ||
|
||
# this will fail - duckdb preserves case but is case insensitive when comparing identifiers | ||
with pytest.raises(PipelineStepFailed) as pip_ex: | ||
pipeline.run([{"🐾Feet": 2, "1+1": "two", "🐾feet": "value"}], table_name="🦚peacocks🦚") | ||
assert isinstance(pip_ex.value.__context__, DatabaseTerminalException) | ||
|
||
# show tables and columns | ||
with pipeline.sql_client() as client: | ||
with client.execute_query("DESCRIBE 🦚peacocks🦚;") as q: | ||
tables = q.df() | ||
assert tables["column_name"].tolist() == ["🐾Feet", "1+1", "hey", "_dlt_load_id", "_dlt_id"] | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.