From d2d792796d973d9062eea34460c5b0956aaedc0d Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Tue, 12 Sep 2023 09:00:10 +0200 Subject: [PATCH] updates weaviate doc --- .../dlt-ecosystem/destinations/weaviate.md | 21 ++++++++++++++----- tests/conftest.py | 4 ++++ .../bigquery/test_bigquery_table_builder.py | 19 ----------------- .../redshift/test_redshift_table_builder.py | 9 -------- .../snowflake/test_snowflake_table_builder.py | 10 --------- 5 files changed, 20 insertions(+), 43 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md index 6a64b920d8..919ed3cff5 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/weaviate.md +++ b/docs/website/docs/dlt-ecosystem/destinations/weaviate.md @@ -230,12 +230,23 @@ Here's a summary of the naming normalization approach: Reserved property names like `id` or `additional` are prefixed with underscores for differentiation. Therefore, `id` becomes `__id` and `_id` is rendered as `___id`. ### Case insensitive naming convention -The default naming convention described above will preserve the casing of the properties (besides the first letter which is lowercased). This generates nice documents +The default naming convention described above will preserve the casing of the properties (besides the first letter which is lowercased). This generates nice classes in Weaviate but also requires that your input data does not have clashing property names when comparing case insensitive ie. (`caseName` == `casename`). In such case -Weaviate destination will fail. - -You can configure alternative naming convention +Weaviate destination will fail to create classes and report a conflict. +You can configure alternative naming convention which will lowercase all properties. The clashing properties will be merged and the classes created. Still if you have a document where clashing properties like: +```json +{"camelCase": 1, "CamelCase": 2} +``` +it will be normalized to: +``` +{"camelcase": 2} +``` +so your best course of action is to clean up the data yourself before loading and use default naming convention. Nevertheless you can configure the alternative in `config.toml`: +```toml +[schema] +naming="dlt.destinations.weaviate.naming" +``` ## Additional destination options @@ -282,4 +293,4 @@ Currently Weaviate destination does not support dbt. ### Syncing of `dlt` state -Weaviate destination does not support syncing of the `dlt` state. +Weaviate destination supports syncing of the `dlt` state. diff --git a/tests/conftest.py b/tests/conftest.py index d084e3f3af..16fd3999b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -68,3 +68,7 @@ def _create_pipeline_instance_id(self) -> str: # disable snowflake logging for log in ["snowflake.connector.cursor", "snowflake.connector.connection"]: logging.getLogger(log).setLevel("ERROR") + + # disable azure logging + for log in ["azure.core.pipeline.policies.http_logging_policy"]: + logging.getLogger(log).setLevel("ERROR") diff --git a/tests/load/bigquery/test_bigquery_table_builder.py b/tests/load/bigquery/test_bigquery_table_builder.py index 0ab691aa2f..9c2ab84904 100644 --- a/tests/load/bigquery/test_bigquery_table_builder.py +++ b/tests/load/bigquery/test_bigquery_table_builder.py @@ -106,22 +106,3 @@ def test_double_partition_exception(gcp_client: BigQueryClient) -> None: gcp_client._get_table_update_sql("event_test_table", mod_update, False) assert excc.value.columns == ["`col4`", "`col5`"] - -def test_partition_alter_table_exception(gcp_client: BigQueryClient) -> None: - mod_update = deepcopy(TABLE_UPDATE) - # timestamp - mod_update[3]["partition"] = True - # double partition - with pytest.raises(DestinationSchemaWillNotUpdate) as excc: - gcp_client._get_table_update_sql("event_test_table", mod_update, True) - assert excc.value.columns == ["`col4`"] - - -def test_cluster_alter_table_exception(gcp_client: BigQueryClient) -> None: - mod_update = deepcopy(TABLE_UPDATE) - # timestamp - mod_update[3]["cluster"] = True - # double cluster - with pytest.raises(DestinationSchemaWillNotUpdate) as excc: - gcp_client._get_table_update_sql("event_test_table", mod_update, True) - assert excc.value.columns == ["`col4`"] diff --git a/tests/load/redshift/test_redshift_table_builder.py b/tests/load/redshift/test_redshift_table_builder.py index 16ef6f8a76..c991844679 100644 --- a/tests/load/redshift/test_redshift_table_builder.py +++ b/tests/load/redshift/test_redshift_table_builder.py @@ -90,12 +90,3 @@ def test_create_table_with_hints(client: RedshiftClient) -> None: # no hints assert '"col3" boolean NOT NULL' in sql assert '"col4" timestamp with time zone NOT NULL' in sql - - -def test_hint_alter_table_exception(client: RedshiftClient) -> None: - mod_update = deepcopy(TABLE_UPDATE) - # timestamp - mod_update[3]["sort"] = True - with pytest.raises(DestinationSchemaWillNotUpdate) as excc: - client._get_table_update_sql("event_test_table", mod_update, True) - assert excc.value.columns == ['"col4"'] diff --git a/tests/load/snowflake/test_snowflake_table_builder.py b/tests/load/snowflake/test_snowflake_table_builder.py index efbd478089..81164625f9 100644 --- a/tests/load/snowflake/test_snowflake_table_builder.py +++ b/tests/load/snowflake/test_snowflake_table_builder.py @@ -90,13 +90,3 @@ def test_create_table_with_partition_and_cluster(snowflake_client: SnowflakeClie # clustering must be the last assert sql.endswith('CLUSTER BY ("COL2","COL5")') - - -def test_cluster_alter_table_exception(snowflake_client: SnowflakeClient) -> None: - mod_update = deepcopy(TABLE_UPDATE) - # timestamp - mod_update[3]["cluster"] = True - # double cluster - with pytest.raises(DestinationSchemaWillNotUpdate) as excc: - snowflake_client._get_table_update_sql("event_test_table", mod_update, True) - assert excc.value.columns == ['"COL4"']