From cc9a486cc75441e9be4f44c73d5d212ee2358c3c Mon Sep 17 00:00:00 2001 From: Marcin Rudolf Date: Mon, 23 Oct 2023 21:01:16 +0200 Subject: [PATCH] fixes tests --- .github/workflows/test_doc_snippets.yml | 2 +- docs/website/docs/general-usage/schema.md | 15 ++++++++------- tests/common/schema/test_inference.py | 3 ++- tests/load/filesystem/test_aws_credentials.py | 1 + 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml index b2a2f241db..e158c2d669 100644 --- a/.github/workflows/test_doc_snippets.yml +++ b/.github/workflows/test_doc_snippets.yml @@ -56,7 +56,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E duckdb -E weaviate --with docs --without airflow + run: poetry install --no-interaction -E duckdb -E weaviate -E parquet --with docs --without airflow - name: Run linter and tests run: make test-and-lint-snippets diff --git a/docs/website/docs/general-usage/schema.md b/docs/website/docs/general-usage/schema.md index e27a87e803..ee73aea54e 100644 --- a/docs/website/docs/general-usage/schema.md +++ b/docs/website/docs/general-usage/schema.md @@ -243,12 +243,12 @@ data itself. The `dlt.source` decorator accepts a schema instance that you can create yourself and modify in whatever way you wish. The decorator also support a few typical use cases: -### 1. Schema created implicitly by decorator +### Schema created implicitly by decorator If no schema instance is passed, the decorator creates a schema with the name set to source name and all the settings to default. -### 2. Automatically load schema file stored with source python module +### Automatically load schema file stored with source python module If no schema instance is passed, and a file with a name `{source name}_schema.yml` exists in the same folder as the module with the decorated function, it will be automatically loaded and used as @@ -256,7 +256,7 @@ the schema. This should make easier to bundle a fully specified (or pre-configured) schema with a source. -### 3. Schema is modified in the source function body +### Schema is modified in the source function body What if you can configure your schema or add some tables only inside your schema function, when i.e. you have the source credentials and user settings available? You could for example add detailed @@ -264,7 +264,7 @@ schemas of all the database tables when someone requests a table data to be load is available only at the moment source function is called. Similarly to the `source_state()` and `resource_state()` , source and resource function has current -schema available via `dlt.current.source_schema`. +schema available via `dlt.current.source_schema()`. Example: @@ -273,9 +273,10 @@ Example: def textual(nesting_level: int): # get the source schema from the `current` context schema = dlt.current.source_schema() - # remove date detector and add type detector that forces all fields to strings - schema._settings["detections"].remove("iso_timestamp") - schema._settings["detections"].insert(0, "all_text") + # remove date detector + schema.remove_type_detection("iso_timestamp") + # convert UNIX timestamp (float, withing a year from NOW) into timestamp + schema.add_type_detection("timestamp") schema.compile_settings() return dlt.resource(...) diff --git a/tests/common/schema/test_inference.py b/tests/common/schema/test_inference.py index be58adab53..24c97219fc 100644 --- a/tests/common/schema/test_inference.py +++ b/tests/common/schema/test_inference.py @@ -372,7 +372,8 @@ def test_corece_null_value_over_not_null(schema: Schema) -> None: def test_infer_with_autodetection(schema: Schema) -> None: - c = schema._infer_column("ts", pendulum.now().timestamp()) + # iso timestamp detection + c = schema._infer_column("ts", pendulum.now().isoformat()) assert c["data_type"] == "timestamp" schema._type_detections = [] c = schema._infer_column("ts", pendulum.now().timestamp()) diff --git a/tests/load/filesystem/test_aws_credentials.py b/tests/load/filesystem/test_aws_credentials.py index 08bd341049..b4f367b4e1 100644 --- a/tests/load/filesystem/test_aws_credentials.py +++ b/tests/load/filesystem/test_aws_credentials.py @@ -58,6 +58,7 @@ def test_aws_credentials_from_botocore(environment: Dict[str, str]) -> None: "secret": "fake_secret_key", "token": "fake_session_token", "profile": None, + "endpoint_url": None, "client_kwargs": { "region_name": session.get_config_variable('region') }