Skip to content

Commit

Permalink
Merge branch 'devel' into feat/simple-rate-limiting
Browse files Browse the repository at this point in the history
# Conflicts:
#	docs/website/docs/general-usage/source.md
  • Loading branch information
sh-rp committed Dec 15, 2024
2 parents 4eb730b + b8bac75 commit 3e6118f
Show file tree
Hide file tree
Showing 123 changed files with 3,955 additions and 1,356 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_athena_iceberg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E athena --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_bigquery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E bigquery --with providers -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_clickhouse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E clickhouse --with providers -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E databricks -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_dremio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- run: |
poetry run pytest tests/load --ignore tests/load/sources
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_motherduck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-motherduck

- name: Install dependencies
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E motherduck -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_mssql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E mssql -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_snowflake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E snowflake -E s3 -E gs -E az -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_destination_synapse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline
run: poetry install --no-interaction -E synapse -E parquet --with sentry-sdk --with pipeline,ibis

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,13 @@ jobs:
# key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline -E deltalake
run: poetry install --no-interaction -E redshift -E postgis -E postgres -E gs -E s3 -E az -E parquet -E duckdb -E cli -E filesystem --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg

- name: enable certificates for azure and duckdb
run: sudo mkdir -p /etc/pki/tls/certs && sudo ln -s /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt

- name: Upgrade sqlalchemy
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,10 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline -E deltalake
run: poetry install --no-interaction -E postgres -E postgis -E duckdb -E parquet -E filesystem -E cli -E weaviate -E qdrant -E sftp --with sentry-sdk --with pipeline,ibis -E deltalake -E pyiceberg

- name: Upgrade sqlalchemy
run: poetry run pip install sqlalchemy==2.0.18 # minimum version required by `pyiceberg`

- name: Start SFTP server
run: docker compose -f "tests/load/filesystem_sftp/docker-compose.yml" up -d
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_sqlalchemy_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-local-destinations

- name: Install dependencies
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"
run: poetry install --no-interaction -E parquet -E filesystem -E sqlalchemy -E cli --with sentry-sdk --with pipeline,ibis && poetry run pip install mysqlclient && poetry run pip install "sqlalchemy==${{ matrix.sqlalchemy }}"

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > tests/.dlt/secrets.toml
Expand Down
5 changes: 2 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ has-poetry:
poetry --version

dev: has-poetry
poetry install --all-extras --with docs,providers,pipeline,sources,sentry-sdk,airflow
poetry install --all-extras --with docs,providers,pipeline,sources,sentry-sdk

lint:
./tools/check-package.sh
Expand All @@ -63,7 +63,6 @@ format:
lint-snippets:
cd docs/tools && poetry run python check_embedded_snippets.py full


lint-and-test-snippets: lint-snippets
poetry run mypy --config-file mypy.ini docs/website docs/tools --exclude docs/tools/lint_setup --exclude docs/website/docs_processed
poetry run flake8 --max-line-length=200 docs/website docs/tools --exclude docs/website/.dlt-repo
Expand All @@ -82,7 +81,7 @@ lint-security:
poetry run bandit -r dlt/ -n 3 -l

test:
(set -a && . tests/.env && poetry run pytest tests)
poetry run pytest tests

test-load-local:
DESTINATION__POSTGRES__CREDENTIALS=postgresql://loader:loader@localhost:5432/dlt_data DESTINATION__DUCKDB__CREDENTIALS=duckdb:///_storage/test_quack.duckdb poetry run pytest tests -k '(postgres or duckdb)'
Expand Down
2 changes: 0 additions & 2 deletions dlt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
)
from dlt.pipeline import progress
from dlt import destinations
from dlt.destinations.dataset import dataset as _dataset

pipeline = _pipeline
current = _current
Expand Down Expand Up @@ -80,7 +79,6 @@
"TCredentials",
"sources",
"destinations",
"_dataset",
]

# verify that no injection context was created
Expand Down
3 changes: 1 addition & 2 deletions dlt/cli/source_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ def find_call_arguments_to_replace(
if not isinstance(dn_node, ast.Constant) or not isinstance(dn_node.value, str):
raise CliCommandInnerException(
"init",
f"The pipeline script {init_script_name} must pass the {t_arg_name} as"
f" string to '{arg_name}' function in line {dn_node.lineno}",
f"The pipeline script {init_script_name} must pass the {t_arg_name} as string to '{arg_name}' function in line {dn_node.lineno}", # type: ignore[attr-defined]
)
else:
transformed_nodes.append((dn_node, ast.Constant(value=t_value, kind=None)))
Expand Down
6 changes: 6 additions & 0 deletions dlt/common/configuration/providers/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ def _read_google_colab_secrets(self, name: str, file_name: str) -> tomlkit.TOMLD
"""Try to load the toml from google colab userdata object"""
try:
from google.colab import userdata
from dlt.common.runtime.exec_info import is_notebook

# make sure we work in interactive mode (get_ipython() is available)
# when dlt cli is run, userdata is available but without a kernel
if not is_notebook():
return None

try:
return tomlkit.loads(userdata.get(file_name))
Expand Down
15 changes: 14 additions & 1 deletion dlt/common/configuration/specs/aws_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
CredentialsWithDefault,
configspec,
)
from dlt.common.configuration.specs.mixins import WithObjectStoreRsCredentials, WithPyicebergConfig
from dlt.common.configuration.specs.exceptions import (
InvalidBoto3Session,
ObjectStoreRsCredentialsException,
Expand All @@ -16,7 +17,9 @@


@configspec
class AwsCredentialsWithoutDefaults(CredentialsConfiguration):
class AwsCredentialsWithoutDefaults(
CredentialsConfiguration, WithObjectStoreRsCredentials, WithPyicebergConfig
):
# credentials without boto implementation
aws_access_key_id: str = None
aws_secret_access_key: TSecretStrValue = None
Expand Down Expand Up @@ -77,6 +80,16 @@ def to_object_store_rs_credentials(self) -> Dict[str, str]:

return creds

def to_pyiceberg_fileio_config(self) -> Dict[str, Any]:
return {
"s3.access-key-id": self.aws_access_key_id,
"s3.secret-access-key": self.aws_secret_access_key,
"s3.session-token": self.aws_session_token,
"s3.region": self.region_name,
"s3.endpoint": self.endpoint_url,
"s3.connect-timeout": 300,
}


@configspec
class AwsCredentials(AwsCredentialsWithoutDefaults, CredentialsWithDefault):
Expand Down
22 changes: 19 additions & 3 deletions dlt/common/configuration/specs/azure_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
CredentialsWithDefault,
configspec,
)
from dlt.common.configuration.specs.mixins import WithObjectStoreRsCredentials, WithPyicebergConfig
from dlt import version
from dlt.common.utils import without_none

_AZURE_STORAGE_EXTRA = f"{version.DLT_PKG_NAME}[az]"


@configspec
class AzureCredentialsBase(CredentialsConfiguration):
class AzureCredentialsBase(CredentialsConfiguration, WithObjectStoreRsCredentials):
azure_storage_account_name: str = None
azure_account_host: Optional[str] = None
"""Alternative host when accessing blob storage endpoint ie. my_account.dfs.core.windows.net"""
Expand All @@ -32,7 +33,7 @@ def to_object_store_rs_credentials(self) -> Dict[str, str]:


@configspec
class AzureCredentialsWithoutDefaults(AzureCredentialsBase):
class AzureCredentialsWithoutDefaults(AzureCredentialsBase, WithPyicebergConfig):
"""Credentials for Azure Blob Storage, compatible with adlfs"""

azure_storage_account_key: Optional[TSecretStrValue] = None
Expand All @@ -49,6 +50,13 @@ def to_adlfs_credentials(self) -> Dict[str, Any]:
account_host=self.azure_account_host,
)

def to_pyiceberg_fileio_config(self) -> Dict[str, Any]:
return {
"adlfs.account-name": self.azure_storage_account_name,
"adlfs.account-key": self.azure_storage_account_key,
"adlfs.sas-token": self.azure_storage_sas_token,
}

def create_sas_token(self) -> None:
try:
from azure.storage.blob import generate_account_sas, ResourceTypes
Expand All @@ -72,7 +80,7 @@ def on_partial(self) -> None:


@configspec
class AzureServicePrincipalCredentialsWithoutDefaults(AzureCredentialsBase):
class AzureServicePrincipalCredentialsWithoutDefaults(AzureCredentialsBase, WithPyicebergConfig):
azure_tenant_id: str = None
azure_client_id: str = None
azure_client_secret: TSecretStrValue = None
Expand All @@ -86,6 +94,14 @@ def to_adlfs_credentials(self) -> Dict[str, Any]:
client_secret=self.azure_client_secret,
)

def to_pyiceberg_fileio_config(self) -> Dict[str, Any]:
return {
"adlfs.account-name": self.azure_storage_account_name,
"adlfs.tenant-id": self.azure_tenant_id,
"adlfs.client-id": self.azure_client_id,
"adlfs.client-secret": self.azure_client_secret,
}


@configspec
class AzureCredentials(AzureCredentialsWithoutDefaults, CredentialsWithDefault):
Expand Down
2 changes: 1 addition & 1 deletion dlt/common/configuration/specs/base_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def _get_resolvable_dataclass_fields(cls) -> Iterator[TDtcField]:
def get_resolvable_fields(cls) -> Dict[str, type]:
"""Returns a mapping of fields to their type hints. Dunders should not be resolved and are not returned"""
return {
f.name: eval(f.type) if isinstance(f.type, str) else f.type # type: ignore[arg-type]
f.name: eval(f.type) if isinstance(f.type, str) else f.type
for f in cls._get_resolvable_dataclass_fields()
}

Expand Down
7 changes: 1 addition & 6 deletions dlt/common/configuration/specs/config_providers_context.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import contextlib
import dataclasses
import io
from typing import ClassVar, List

Expand All @@ -8,10 +7,6 @@
ConfigProvider,
ContextProvider,
)
from dlt.common.configuration.specs.base_configuration import (
ContainerInjectableContext,
NotResolved,
)
from dlt.common.configuration.specs import (
GcpServiceAccountCredentials,
BaseConfiguration,
Expand Down Expand Up @@ -137,7 +132,7 @@ def _airflow_providers() -> List[ConfigProvider]:
# check if we are in task context and provide more info
from airflow.operators.python import get_current_context # noqa

ti: TaskInstance = get_current_context()["ti"] # type: ignore
ti: TaskInstance = get_current_context()["ti"] # type: ignore[assignment,unused-ignore]

# log outside of stderr/out redirect
if secrets_toml_var is None:
Expand Down
4 changes: 4 additions & 0 deletions dlt/common/configuration/specs/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,7 @@ def __init__(self, spec: Type[Any], native_value: Any):

class ObjectStoreRsCredentialsException(ConfigurationException):
pass


class UnsupportedAuthenticationMethodException(ConfigurationException):
pass
Loading

0 comments on commit 3e6118f

Please sign in to comment.