Skip to content

Commit

Permalink
Merge branch 'devel' into d#/data_contracts
Browse files Browse the repository at this point in the history
  • Loading branch information
rudolfix committed Nov 12, 2023
2 parents e980396 + 0843ff6 commit 5e2d131
Show file tree
Hide file tree
Showing 116 changed files with 5,255 additions and 300 deletions.
82 changes: 82 additions & 0 deletions .github/workflows/test_destination_qdrant.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
name: test Qdrant

on:
pull_request:
branches:
- master
- devel
workflow_dispatch:

env:
DESTINATION__QDRANT__CREDENTIALS__LOCATION: ${{ secrets.DESTINATION__QDRANT__CREDENTIALS__LOCATION }}
DESTINATION__QDRANT__CREDENTIALS__API_KEY: ${{ secrets.DESTINATION__QDRANT__CREDENTIALS__API_KEY }}

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
RUNTIME__LOG_LEVEL: ERROR

ACTIVE_DESTINATIONS: "[\"qdrant\"]"
ALL_FILESYSTEM_DRIVERS: "[\"memory\"]"

jobs:
get_docs_changes:
uses: ./.github/workflows/get_docs_changes.yml
if: ${{ !github.event.pull_request.head.repo.fork }}

run_loader:
name: Tests Qdrant loader
needs: get_docs_changes
if: needs.get_docs_changes.outputs.changes_outside_docs == 'true'
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
defaults:
run:
shell: bash
runs-on: ${{ matrix.os }}

steps:
- name: Check out
uses: actions/checkout@master

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.10.x"

- name: Install Poetry
uses: snok/[email protected]
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-gcp

- name: Install dependencies
run: poetry install --no-interaction -E qdrant -E parquet
- run: |
poetry run pytest tests/load/
if: runner.os != 'Windows'
name: Run tests Linux/MAC
- run: |
poetry run pytest tests/load/
if: runner.os == 'Windows'
name: Run tests Windows
shell: cmd
matrix_job_required_check:
name: Qdrant loader tests
needs: run_loader
runs-on: ubuntu-latest
if: always()
steps:
- name: Check matrix job results
if: contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')
run: |
echo "One or more matrix job tests failed or were cancelled. You may need to re-run them." && exit 1
2 changes: 2 additions & 0 deletions .github/workflows/test_doc_snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ env:
SOURCES__ZENDESK__CREDENTIALS: ${{ secrets.ZENDESK__CREDENTIALS }}
# Slack hook for chess in production example
RUNTIME__SLACK_INCOMING_HOOK: ${{ secrets.RUNTIME__SLACK_INCOMING_HOOK }}
# Mongodb url for nested data example
MONGODB_PIPELINE__SOURCES__CONNECTION_URL: ${{ secrets.MONGODB_PIPELINE__SOURCES__CONNECTION_URL }}
jobs:

run_lint:
Expand Down
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -138,4 +138,7 @@ logs/

# temp
tmp
**/tmp
**/tmp

# Qdrant embedding models cache
local_cache/
24 changes: 24 additions & 0 deletions dlt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,27 @@

TCredentials = _CredentialsConfiguration
"When typing source/resource function arguments it indicates that a given argument represents credentials and should be taken from dlt.secrets. Credentials may be a string, dictionary or any other type."


__all__ = [
"__version__",
"config",
"secrets",
"state",
"Schema",
"source",
"resource",
"transformer",
"defer",
"pipeline",
"run",
"attach",
"Pipeline",
"dbt",
"progress",
"current",
"mark",
"TSecretValue",
"TCredentials",
"sources",
]
12 changes: 7 additions & 5 deletions dlt/common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from dlt.common.arithmetics import Decimal # noqa: F401
from dlt.common.arithmetics import Decimal
from dlt.common.wei import Wei
from dlt.common.pendulum import pendulum # noqa: F401
from dlt.common.json import json # noqa: F401, I251
from dlt.common.runtime.signals import sleep # noqa: F401
from dlt.common.runtime import logger # noqa: F401
from dlt.common.pendulum import pendulum
from dlt.common.json import json
from dlt.common.runtime.signals import sleep
from dlt.common.runtime import logger

__all__ = ["Decimal", "Wei", "pendulum", "json", "sleep", "logger"]
28 changes: 23 additions & 5 deletions dlt/common/configuration/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
from .specs.base_configuration import configspec, is_valid_hint, is_secret_hint, resolve_type # noqa: F401
from .specs import known_sections # noqa: F401
from .resolve import resolve_configuration, inject_section # noqa: F401
from .inject import with_config, last_config, get_fun_spec # noqa: F401
from .specs.base_configuration import configspec, is_valid_hint, is_secret_hint, resolve_type
from .specs import known_sections
from .resolve import resolve_configuration, inject_section
from .inject import with_config, last_config, get_fun_spec

from .exceptions import ( # noqa: F401
from .exceptions import (
ConfigFieldMissingException,
ConfigValueCannotBeCoercedException,
ConfigFileNotFoundException,
ConfigurationValueError
)


__all__ = [
"configspec",
"is_valid_hint",
"is_secret_hint",
"resolve_type",
"known_sections",
"resolve_configuration",
"inject_section",
"with_config",
"last_config",
"get_fun_spec",
"ConfigFieldMissingException",
"ConfigValueCannotBeCoercedException",
"ConfigFileNotFoundException",
"ConfigurationValueError",
]
17 changes: 16 additions & 1 deletion dlt/common/configuration/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,19 @@
from .dictionary import DictionaryProvider
from .toml import SecretsTomlProvider, ConfigTomlProvider, TomlFileProvider, CONFIG_TOML, SECRETS_TOML, StringTomlProvider, SECRETS_TOML_KEY
from .google_secrets import GoogleSecretsProvider
from .context import ContextProvider
from .context import ContextProvider

__all__ = [
"ConfigProvider",
"EnvironProvider",
"DictionaryProvider",
"SecretsTomlProvider",
"ConfigTomlProvider",
"TomlFileProvider",
"CONFIG_TOML",
"SECRETS_TOML",
"StringTomlProvider",
"SECRETS_TOML_KEY",
"GoogleSecretsProvider",
"ContextProvider",
]
31 changes: 22 additions & 9 deletions dlt/common/configuration/specs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
from .run_configuration import RunConfiguration # noqa: F401
from .base_configuration import BaseConfiguration, CredentialsConfiguration, CredentialsWithDefault, ContainerInjectableContext, extract_inner_hint, is_base_configuration_inner_hint, configspec # noqa: F401
from .config_section_context import ConfigSectionContext # noqa: F401
from .run_configuration import RunConfiguration
from .base_configuration import BaseConfiguration, CredentialsConfiguration, CredentialsWithDefault, ContainerInjectableContext, extract_inner_hint, is_base_configuration_inner_hint, configspec
from .config_section_context import ConfigSectionContext

from .gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults, GcpServiceAccountCredentials, GcpOAuthCredentialsWithoutDefaults, GcpOAuthCredentials, GcpCredentials # noqa: F401
from .connection_string_credentials import ConnectionStringCredentials # noqa: F401
from .api_credentials import OAuth2Credentials # noqa: F401
from .aws_credentials import AwsCredentials, AwsCredentialsWithoutDefaults # noqa: F401
from .azure_credentials import AzureCredentials, AzureCredentialsWithoutDefaults # noqa: F401
from .gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults, GcpServiceAccountCredentials, GcpOAuthCredentialsWithoutDefaults, GcpOAuthCredentials, GcpCredentials
from .connection_string_credentials import ConnectionStringCredentials
from .api_credentials import OAuth2Credentials
from .aws_credentials import AwsCredentials, AwsCredentialsWithoutDefaults
from .azure_credentials import AzureCredentials, AzureCredentialsWithoutDefaults


# backward compatibility for service account credentials
from .gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults as GcpClientCredentials, GcpServiceAccountCredentials as GcpClientCredentialsWithDefault # noqa: F401
from .gcp_credentials import GcpServiceAccountCredentialsWithoutDefaults as GcpClientCredentials, GcpServiceAccountCredentials as GcpClientCredentialsWithDefault


__all__ = [
"RunConfiguration",
"BaseConfiguration", "CredentialsConfiguration", "CredentialsWithDefault", "ContainerInjectableContext", "extract_inner_hint", "is_base_configuration_inner_hint", "configspec",
"ConfigSectionContext",
"GcpServiceAccountCredentialsWithoutDefaults", "GcpServiceAccountCredentials", "GcpOAuthCredentialsWithoutDefaults", "GcpOAuthCredentials", "GcpCredentials",
"ConnectionStringCredentials",
"OAuth2Credentials",
"AwsCredentials", "AwsCredentialsWithoutDefaults",
"AzureCredentials", "AzureCredentialsWithoutDefaults",
"GcpClientCredentials", "GcpClientCredentialsWithDefault",
]
6 changes: 5 additions & 1 deletion dlt/common/data_types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
from dlt.common.data_types.type_helpers import coerce_value, py_type_to_sc_type
from dlt.common.data_types.typing import TDataType, DATA_TYPES
from dlt.common.data_types.typing import TDataType, DATA_TYPES

__all__ = [
"coerce_value", "py_type_to_sc_type", "TDataType", "DATA_TYPES"
]
7 changes: 6 additions & 1 deletion dlt/common/data_writers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
from dlt.common.data_writers.writers import DataWriter, TLoaderFileFormat
from dlt.common.data_writers.buffered import BufferedDataWriter
from dlt.common.data_writers.escape import escape_redshift_literal, escape_redshift_identifier, escape_bigquery_identifier
from dlt.common.data_writers.escape import escape_redshift_literal, escape_redshift_identifier, escape_bigquery_identifier

__all__ = [
"DataWriter", "TLoaderFileFormat", "BufferedDataWriter",
"escape_redshift_literal", "escape_redshift_identifier", "escape_bigquery_identifier"
]
8 changes: 8 additions & 0 deletions dlt/common/destination/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
from dlt.common.destination.capabilities import DestinationCapabilitiesContext, TLoaderFileFormat, ALL_SUPPORTED_FILE_FORMATS
from dlt.common.destination.reference import DestinationReference, TDestinationReferenceArg

__all__ = [
"DestinationCapabilitiesContext",
"TLoaderFileFormat",
"ALL_SUPPORTED_FILE_FORMATS",
"DestinationReference",
"TDestinationReferenceArg",
]
11 changes: 11 additions & 0 deletions dlt/common/json/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,14 @@ def may_have_pua(line: bytes) -> bool:
except ImportError:
from dlt.common.json import _simplejson as _json_simple
json = _json_simple # type: ignore[assignment]


__all__ = [
"json",
"custom_encode",
"custom_pua_encode",
"custom_pua_decode",
"custom_pua_decode_nested",
"custom_pua_remove",
"SupportsJson"
]
8 changes: 7 additions & 1 deletion dlt/common/normalizers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
from dlt.common.normalizers.configuration import NormalizersConfiguration
from dlt.common.normalizers.typing import TJSONNormalizer, TNormalizersConfig
from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers
from dlt.common.normalizers.utils import explicit_normalizers, import_normalizers

__all__ = [
"NormalizersConfiguration",
"TJSONNormalizer", "TNormalizersConfig",
"explicit_normalizers", "import_normalizers"
]
9 changes: 9 additions & 0 deletions dlt/common/normalizers/json/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,12 @@ class SupportsDataItemNormalizer(Protocol):
def wrap_in_dict(item: Any) -> DictStrAny:
"""Wraps `item` that is not a dictionary into dictionary that can be json normalized"""
return {"value": item}


__all__ = [
"TNormalizedRowIterator",
"TNormalizerConfig",
"DataItemNormalizer",
"SupportsDataItemNormalizer",
"wrap_in_dict",
]
4 changes: 4 additions & 0 deletions dlt/common/normalizers/naming/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
from .naming import SupportsNamingConvention, NamingConvention

__all__ = [
'SupportsNamingConvention', "NamingConvention"
]

9 changes: 8 additions & 1 deletion dlt/common/runners/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
from . import pool_runner
from .pool_runner import run_pool, NullExecutor
from .runnable import Runnable, workermethod, TExecutor
from .typing import TRunMetrics
from .venv import Venv, VenvNotFound


__all__ = [
"run_pool", "NullExecutor",
"Runnable", "workermethod", "TExecutor",
"TRunMetrics",
"Venv", "VenvNotFound"
]
4 changes: 3 additions & 1 deletion dlt/common/runtime/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .init import initialize_runtime
from .init import initialize_runtime

__all__ = ["initialize_runtime"]
14 changes: 10 additions & 4 deletions dlt/common/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
from dlt.common.schema.typing import TSchemaContractDict, TSchemaUpdate, TSchemaTables, TTableSchema, TStoredSchema, TTableSchemaColumns, TColumnHint, TColumnSchema, TColumnSchemaBase # noqa: F401
from dlt.common.schema.typing import COLUMN_HINTS # noqa: F401
from dlt.common.schema.schema import Schema, DEFAULT_SCHEMA_CONTRACT_MODE # noqa: F401
from dlt.common.schema.utils import verify_schema_hash # noqa: F401
from dlt.common.schema.typing import TSchemaContractDict, TSchemaUpdate, TSchemaTables, TTableSchema, TStoredSchema, TTableSchemaColumns, TColumnHint, TColumnSchema, TColumnSchemaBase
from dlt.common.schema.typing import COLUMN_HINTS
from dlt.common.schema.schema import Schema, DEFAULT_SCHEMA_CONTRACT_MODE
from dlt.common.schema.utils import verify_schema_hash

__all__ = [
"TSchemaUpdate", "TSchemaTables", "TTableSchema", "TStoredSchema", "TTableSchemaColumns", "TColumnHint",
"TColumnSchema", "TColumnSchemaBase", "COLUMN_HINTS", "Schema", "verify_schema_hash", "TSchemaContractDict",
"DEFAULT_SCHEMA_CONTRACT_MODE"
]
31 changes: 22 additions & 9 deletions dlt/common/storages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
from .file_storage import FileStorage # noqa: F401
from .versioned_storage import VersionedStorage # noqa: F401
from .schema_storage import SchemaStorage # noqa: F401
from .live_schema_storage import LiveSchemaStorage # noqa: F401
from .normalize_storage import NormalizeStorage # noqa: F401
from .load_storage import LoadStorage # noqa: F401
from .data_item_storage import DataItemStorage # noqa: F401
from .configuration import LoadStorageConfiguration, NormalizeStorageConfiguration, SchemaStorageConfiguration, TSchemaFileFormat, FilesystemConfiguration # noqa: F401
from .fsspec_filesystem import fsspec_from_config, fsspec_filesystem # noqa: F401
from .file_storage import FileStorage
from .versioned_storage import VersionedStorage
from .schema_storage import SchemaStorage
from .live_schema_storage import LiveSchemaStorage
from .normalize_storage import NormalizeStorage
from .load_storage import LoadStorage
from .data_item_storage import DataItemStorage
from .configuration import LoadStorageConfiguration, NormalizeStorageConfiguration, SchemaStorageConfiguration, TSchemaFileFormat, FilesystemConfiguration
from .fsspec_filesystem import fsspec_from_config, fsspec_filesystem


__all__ = [
"FileStorage",
"VersionedStorage",
"SchemaStorage",
"LiveSchemaStorage",
"NormalizeStorage",
"LoadStorage",
"DataItemStorage",
"LoadStorageConfiguration", "NormalizeStorageConfiguration", "SchemaStorageConfiguration", "TSchemaFileFormat", "FilesystemConfiguration",
"fsspec_from_config", "fsspec_filesystem",
]
2 changes: 1 addition & 1 deletion dlt/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def flatten_list_of_str_or_dicts(seq: Sequence[Union[StrAny, str]]) -> DictStrAn
# return dicts


def flatten_list_or_items(_iter: Union[Iterator[TAny], Iterator[List[TAny]]]) -> Iterator[TAny]:
def flatten_list_or_items(_iter: Union[Iterable[TAny], Iterable[List[TAny]]]) -> Iterator[TAny]:
for items in _iter:
if isinstance(items, List):
yield from items
Expand Down
Loading

0 comments on commit 5e2d131

Please sign in to comment.