Skip to content

Commit

Permalink
Merge pull request #866 from dlt-hub/devel
Browse files Browse the repository at this point in the history
0.4.2 release
  • Loading branch information
burnash authored Dec 29, 2023
2 parents 84816c5 + 2c1dbde commit 3d13835
Show file tree
Hide file tree
Showing 34 changed files with 538 additions and 185 deletions.
10 changes: 6 additions & 4 deletions .github/workflows/test_doc_snippets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ on:
workflow_dispatch:

env:
DLT_SECRETS_TOML: ${{ secrets.DLT_SECRETS_TOML }}

DESTINATION__DUCKDB__CREDENTIALS: duckdb:///_storage/test_quack.duckdb

RUNTIME__SENTRY_DSN: https://[email protected]/4504819859914752
Expand All @@ -18,12 +20,9 @@ env:
DESTINATION__WEAVIATE__VECTORIZER: text2vec-contextionary
DESTINATION__WEAVIATE__MODULE_CONFIG: "{\"text2vec-contextionary\": {\"vectorizeClassName\": false, \"vectorizePropertyName\": true}}"

# zendesk vars for example
SOURCES__ZENDESK__CREDENTIALS: ${{ secrets.ZENDESK__CREDENTIALS }}
# Slack hook for chess in production example
RUNTIME__SLACK_INCOMING_HOOK: ${{ secrets.RUNTIME__SLACK_INCOMING_HOOK }}
# Mongodb url for nested data example
MONGODB_PIPELINE__SOURCES__CONNECTION_URL: ${{ secrets.MONGODB_PIPELINE__SOURCES__CONNECTION_URL }}

# Qdrant credentials
DESTINATION__QDRANT__CREDENTIALS__LOCATION: ${{ secrets.DESTINATION__QDRANT__CREDENTIALS__LOCATION }}
DESTINATION__QDRANT__CREDENTIALS__API_KEY: ${{ secrets.DESTINATION__QDRANT__CREDENTIALS__API_KEY }}
Expand Down Expand Up @@ -65,6 +64,9 @@ jobs:
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E duckdb -E weaviate -E parquet -E qdrant --with docs,sentry-sdk --without airflow

- name: create secrets.toml
run: pwd && echo "$DLT_SECRETS_TOML" > docs/website/docs/.dlt/secrets.toml

- name: Run linter and tests
run: make test-and-lint-snippets

8 changes: 7 additions & 1 deletion dlt/common/destination/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from importlib import import_module
from types import TracebackType
from typing import (
Callable,
ClassVar,
Optional,
NamedTuple,
Expand Down Expand Up @@ -418,7 +419,7 @@ def should_truncate_table_before_load_on_staging_destination(self, table: TTable
return True


TDestinationReferenceArg = Union[str, "Destination", None]
TDestinationReferenceArg = Union[str, "Destination", Callable[..., "Destination"], None]


class Destination(ABC, Generic[TDestinationConfig, TDestinationClient]):
Expand Down Expand Up @@ -485,6 +486,8 @@ def to_name(ref: TDestinationReferenceArg) -> str:
raise InvalidDestinationReference(ref)
if isinstance(ref, str):
return ref.rsplit(".", 1)[-1]
if callable(ref):
ref = ref()
return ref.destination_name

@staticmethod
Expand Down Expand Up @@ -515,6 +518,9 @@ def from_reference(
ref = destination_name
if ref is None:
return None
# evaluate callable returning Destination
if callable(ref):
ref = ref()
if isinstance(ref, Destination):
if credentials or destination_name or environment:
logger.warning(
Expand Down
6 changes: 6 additions & 0 deletions dlt/destinations/adapters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""This module collects all destination adapters present in `impl` namespace"""

from dlt.destinations.impl.weaviate import weaviate_adapter
from dlt.destinations.impl.qdrant import qdrant_adapter

__all__ = ["weaviate_adapter", "qdrant_adapter"]
2 changes: 1 addition & 1 deletion dlt/destinations/impl/mssql/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def to_db_integer_type(
def from_db_type(
self, db_type: str, precision: Optional[int], scale: Optional[int]
) -> TColumnType:
if db_type == "numeric":
if db_type == "decimal":
if (precision, scale) == self.capabilities.wei_precision:
return dict(data_type="wei")
return super().from_db_type(db_type, precision, scale)
Expand Down
4 changes: 4 additions & 0 deletions dlt/sources/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
GcpServiceAccountCredentials,
GcpOAuthCredentials,
GcpCredentials,
AwsCredentials,
AzureCredentials,
)
from dlt.common.configuration.specs import ConnectionStringCredentials
from dlt.common.configuration.specs import OAuth2Credentials
Expand All @@ -13,6 +15,8 @@
"GcpServiceAccountCredentials",
"GcpOAuthCredentials",
"GcpCredentials",
"AwsCredentials",
"AzureCredentials",
"ConnectionStringCredentials",
"OAuth2Credentials",
"CredentialsConfiguration",
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/nested_data/.dlt/example.secrets.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[mongodb_pipeline.sources]
[sources.mongodb]
connection_url=""
2 changes: 1 addition & 1 deletion docs/examples/qdrant_zendesk/qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dlt.common.time import ensure_pendulum_datetime
from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers.requests import client
from dlt.destinations.impl.qdrant import qdrant_adapter
from dlt.destinations.adapters import qdrant_adapter
from qdrant_client import QdrantClient

from dlt.common.configuration.inject import with_config
Expand Down
2 changes: 1 addition & 1 deletion docs/website/blog/2023-09-26-verba-dlt-zendesk.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ import itertools

import dlt
from weaviate.util import generate_uuid5
from dlt.destinations.weaviate import weaviate_adapter
from dlt.destinations.adapters import weaviate_adapter

from zendesk import zendesk_support

Expand Down
25 changes: 12 additions & 13 deletions docs/website/docs/conftest.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
import os
import pytest
from typing import List
from unittest.mock import patch

from dlt.common.utils import set_working_dir
from dlt.common.configuration.container import Container

# patch which providers to enable
from dlt.common.configuration.providers import (
StringTomlProvider,
ConfigTomlProvider,
EnvironProvider,
SecretsTomlProvider,
ConfigTomlProvider,
StringTomlProvider,
)
from dlt.common.configuration.specs.config_providers_context import (
ConfigProvidersContext,
ConfigProvidersConfiguration,
)
from dlt.common.utils import set_working_dir

from tests.utils import (
patch_home_dir,
Expand All @@ -28,16 +24,17 @@


@pytest.fixture(autouse=True)
def setup_tests(request):
# always set working dir to main website folder
def setup_secret_providers(request):
"""Creates set of config providers where tomls are loaded from tests/.dlt"""
secret_dir = "./.dlt"
dname = os.path.dirname(request.module.__file__)
config_dir = dname + "/.dlt"

# inject provider context so the original providers are restored at the end
def _initial_providers():
return [
EnvironProvider(),
SecretsTomlProvider(project_dir=config_dir, add_global_config=False),
SecretsTomlProvider(project_dir=secret_dir, add_global_config=False),
ConfigTomlProvider(project_dir=config_dir, add_global_config=False),
]

Expand All @@ -48,11 +45,13 @@ def _initial_providers():
"dlt.common.configuration.specs.config_providers_context.ConfigProvidersContext.initial_providers",
_initial_providers,
):
# extras work when container updated
glob_ctx.add_extras()
yield


def pytest_configure(config):
# push sentry to ci
os.environ["RUNTIME__SENTRY_DSN"] = (
"https://[email protected]/4504819859914752"
)
os.environ[
"RUNTIME__SENTRY_DSN"
] = "https://[email protected]/4504819859914752"
2 changes: 1 addition & 1 deletion docs/website/docs/dlt-ecosystem/destinations/qdrant.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ If no configuration options are provided, the default fallback will be `http://l

```python
import dlt
from dlt.destinations.impl.qdrant import qdrant_adapter
from dlt.destinations.adapters import qdrant_adapter

movies = [
{
Expand Down
2 changes: 1 addition & 1 deletion docs/website/docs/dlt-ecosystem/destinations/weaviate.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ The `url` will default to **http://localhost:8080** and `api_key` is not defined

```python
import dlt
from dlt.destinations.weaviate import weaviate_adapter
from dlt.destinations.adapters import weaviate_adapter

movies = [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ print(f"Job run triggered successfully. Run ID: {job_run_id}")

# Example: Get run status
run_status = client.get_run_status(run_id=job_run_id)
print(f"Job run status: {run_status["status_humanized"]}")
print(f"Job run status: {run_status['status_humanized']}")
```

## Helper functions
Expand Down
42 changes: 36 additions & 6 deletions docs/website/docs/dlt-ecosystem/verified-sources/hubspot.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ web analytics events data into the destination.
```python
@dlt.source(name="hubspot")
def hubspot(
api_key: str = dlt.secrets.value, include_history: bool = False
api_key: str = dlt.secrets.value,
include_history: bool = False,
) -> Sequence[DltResource]:
```

Expand All @@ -169,14 +170,26 @@ the destination, replacing any existing data.
```python
@dlt.resource(name="companies", write_disposition="replace")
def companies(
api_key: str = dlt.secrets.value, include_history: bool = False
) -> Iterator[TDataItems]:
"""Hubspot companies resource"""
yield from crm_objects("company", api_key, include_history=False)
api_key: str = api_key,
include_history: bool = include_history,
props: Sequence[str] = DEFAULT_COMPANY_PROPS,
include_custom_props: bool = True,
) -> Iterator[TDataItems]:
"""Hubspot companies resource"""
yield from crm_objects(
"company",
api_key,
include_history=include_history,
props=props,
include_custom_props=include_custom_props,
)
```

This resource function takes the same arguments, `api_key` and `include_history` as the "husbpot"
source described [above](hubspot.md#source-hubspot). Similar to this, resource functions "contacts",
source described [above](hubspot.md#source-hubspot), but also supports two additional.
`include_custom_props` - indicates if all the properties of CRM objects, except Hubspot driven
(prefixed with `hs_`), are to be extracted. `props` - the list of properties to extract
in addition to the custom properties. Similar to this, resource functions "contacts",
"deals", "tickets", "products", and "quotes" retrieve data from the Hubspot API.

### Resource `hubspot_events_for_objects`
Expand Down Expand Up @@ -241,6 +254,23 @@ verified source.
```
1. `include_history` loads property change history and entities as separate tables. By default set as False.

1. By default, all the custom properties of a CRM object are extracted. If you want only particular fields,
set the flag `include_custom_props=False` and add a list of properties with the `props` arg.

```python
load_data = hubspot()
load_data.contacts.bind(props=["date_of_birth", "degree"], include_custom_props=False)
load_info = pipeline.run(load_data.with_resources("contacts"))
```

1. If you want to read all the custom properties of CRM objects and some additional (e.g. Hubspot driven) properties.

```python
load_data = hubspot()
load_data.contacts.bind(props=["hs_content_membership_email", "hs_content_membership_email_confirmed"])
load_info = pipeline.run(load_data.with_resources("contacts"))
```


1. To load the web analytics events of a given object type.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,12 @@ def load_data_with_retry(pipeline, data):
load_info = pipeline.run(data)
logger.info(str(load_info))

# raise on failed jobs
load_info.raise_on_failed_jobs()
# send notification
send_slack_message(
pipeline.runtime_config.slack_incoming_hook, "Data was successfully loaded!"
)
# raise on failed jobs
load_info.raise_on_failed_jobs()
# send notification
send_slack_message(
pipeline.runtime_config.slack_incoming_hook, "Data was successfully loaded!"
)
except Exception:
# we get here after all the failed retries
# send notification
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# @@@DLT_SNIPPET_START example
[mongodb_pipeline.sources]
[sources.mongodb]
connection_url=""
# @@@DLT_SNIPPET_END example
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from tests.utils import skipifgithubfork

__source_name__ = "mongodb"


@skipifgithubfork
def nested_data_snippet() -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def qdrant_snippet():
from dlt.common.time import ensure_pendulum_datetime
from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers.requests import client
from dlt.destinations.impl.qdrant import qdrant_adapter
from dlt.destinations.adapters import qdrant_adapter
from qdrant_client import QdrantClient

from dlt.common.configuration.inject import with_config
Expand Down
2 changes: 1 addition & 1 deletion docs/website/docs/examples/qdrant_zendesk/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ from dlt.common import pendulum
from dlt.common.time import ensure_pendulum_datetime
from dlt.common.typing import TAnyDateTime
from dlt.sources.helpers.requests import client
from dlt.destinations.impl.qdrant import qdrant_adapter
from dlt.destinations.adapters import qdrant_adapter
from qdrant_client import QdrantClient

from dlt.common.configuration.inject import with_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,11 @@ def google_sheets(
credentials=dlt.secrets.value,
only_strings=False
):
sheets = build('sheets', 'v4', credentials=Services.from_json(credentials))
tabs = []
for tab_name in tab_names:
data = sheets.get(spreadsheet_id, tab_name).execute().values()
tabs.append(dlt.resource(data, name=tab_name))
return tabs
...
```

In case of `google_sheets()` it will look
for: `spreadsheet_id`, `tab_names` and `credentials`.
for: `spreadsheet_id`, `tab_names`, `credentials` and `only_strings`

Each provider has its own key naming convention, and dlt is able to translate between them.

Expand Down
Loading

0 comments on commit 3d13835

Please sign in to comment.