From bfd0b52848b0da361cb4df467991105ed0a18cec Mon Sep 17 00:00:00 2001 From: rudolfix Date: Sun, 24 Nov 2024 00:54:03 +0100 Subject: [PATCH] azure account host docs (#2091) * updates docs * bumps to alpha 1.4.1a0 * fixes mypy linting --- .../docs/dlt-ecosystem/destinations/filesystem.md | 10 ++++------ mypy.ini | 6 ++++++ pyproject.toml | 2 +- tests/load/test_read_interfaces.py | 12 ++++++------ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md index 86970fe49d..0a41bb4c7b 100644 --- a/docs/website/docs/dlt-ecosystem/destinations/filesystem.md +++ b/docs/website/docs/dlt-ecosystem/destinations/filesystem.md @@ -181,13 +181,14 @@ bucket_url = "abfss://@.dfs.core.windows.n You can use `az`, `abfss`, `azure` and `abfs` url schemes. -If you need to use a custom host for your account you can set it up like below: +If you need to use a custom host to account your storage account you can set it up like below: ```toml [destination.filesystem.credentials] # The storage account name is always required azure_account_host = "." ``` -Remember to include `storage_account_name` with your base host: `dlt_ci.blob.core.usgovcloudapi.net`. +Remember to include `storage_account_name` with your base host ie. `dlt_ci.blob.core.usgovcloudapi.net`. +`dlt` will use this host to connect to azure blob storage without any modifications: Two forms of Azure credentials are supported: @@ -209,14 +210,13 @@ If you have the correct Azure credentials set up on your machine (e.g., via Azur you can omit both `azure_storage_account_key` and `azure_storage_sas_token` and `dlt` will fall back to the available default. Note that `azure_storage_account_name` is still required as it can't be inferred from the environment. -`dlt` supports the - #### Service principal credentials Supply a client ID, client secret, and a tenant ID for a service principal authorized to access your container. ```toml [destination.filesystem.credentials] +azure_storage_account_name = "account_name" # please set me up! azure_client_id = "client_id" # please set me up! azure_client_secret = "client_secret" azure_tenant_id = "tenant_id" # please set me up! @@ -231,8 +231,6 @@ max_concurrency=3 ``` ::: - - ### Local file system If for any reason you want to have those files in a local folder, set up the `bucket_url` as follows (you are free to use `config.toml` for that as there are no secrets required): diff --git a/mypy.ini b/mypy.ini index b32d13c199..eee4db6126 100644 --- a/mypy.ini +++ b/mypy.ini @@ -128,4 +128,10 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-snowflake.*] +ignore_missing_imports = True + +[mypy-backports.*] +ignore_missing_imports = True + +[mypy-time_machine.*] ignore_missing_imports = True \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5532e44b28..a1a71a1a6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "dlt" -version = "1.4.0" +version = "1.4.1a0" description = "dlt is an open-source python-first scalable data loading library that does not require any backend to run." authors = ["dltHub Inc. "] maintainers = [ "Marcin Rudolf ", "Adrian Brudaru ", "Anton Burnashev ", "David Scharf " ] diff --git a/tests/load/test_read_interfaces.py b/tests/load/test_read_interfaces.py index 4e03630b18..f5a8d51baf 100644 --- a/tests/load/test_read_interfaces.py +++ b/tests/load/test_read_interfaces.py @@ -457,19 +457,19 @@ def test_schema_arg(populated_pipeline: Pipeline) -> None: # if there is no arg, the defautl schema is used dataset = populated_pipeline._dataset() - assert dataset.schema.name == populated_pipeline.default_schema_name # type: ignore - assert "items" in dataset.schema.tables # type: ignore + assert dataset.schema.name == populated_pipeline.default_schema_name + assert "items" in dataset.schema.tables # setting a different schema name will try to load that schema, # not find one and create an empty schema with that name dataset = populated_pipeline._dataset(schema="unknown_schema") - assert dataset.schema.name == "unknown_schema" # type: ignore - assert "items" not in dataset.schema.tables # type: ignore + assert dataset.schema.name == "unknown_schema" + assert "items" not in dataset.schema.tables # providing the schema name of the right schema will load it dataset = populated_pipeline._dataset(schema=populated_pipeline.default_schema_name) - assert dataset.schema.name == populated_pipeline.default_schema_name # type: ignore - assert "items" in dataset.schema.tables # type: ignore + assert dataset.schema.name == populated_pipeline.default_schema_name + assert "items" in dataset.schema.tables @pytest.mark.no_load