Skip to content

Commit

Permalink
Masks secrets in traces. (#1797)
Browse files Browse the repository at this point in the history
* Masks secrets in traces.

* tests that secrets are masked in stringified trace

* generates secrets in deployments from dlt.secrets provider instead of pipeline trace

* corrects masking and looks up secret value in dlt.secrets

* removes secret masking and replaces credentials with None.

* fixes deploy help when deploy type missing

* fixes always_choose restore defaults in echo

* tests deploy command with and without secrets

* fixes dumping secret vals for toml in deploy

---------

Co-authored-by: Marcin Rudolf <[email protected]>
  • Loading branch information
willi-mueller and rudolfix authored Sep 14, 2024
1 parent 5b92fea commit 1714801
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 66 deletions.
18 changes: 11 additions & 7 deletions dlt/cli/_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,13 +617,17 @@ def main() -> int:
elif args.command == "deploy":
try:
deploy_args = vars(args)
return deploy_command_wrapper(
pipeline_script_path=deploy_args.pop("pipeline_script_path"),
deployment_method=deploy_args.pop("deployment_method"),
repo_location=deploy_args.pop("location"),
branch=deploy_args.pop("branch"),
**deploy_args,
)
if deploy_args.get("deployment_method") is None:
print_help(deploy_cmd)
return -1
else:
return deploy_command_wrapper(
pipeline_script_path=deploy_args.pop("pipeline_script_path"),
deployment_method=deploy_args.pop("deployment_method"),
repo_location=deploy_args.pop("location"),
branch=deploy_args.pop("branch"),
**deploy_args,
)
except (NameError, KeyError):
fmt.warning(
"Please install additional command line dependencies to use deploy command:"
Expand Down
9 changes: 2 additions & 7 deletions dlt/cli/deploy_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum
from importlib.metadata import version as pkg_version

from dlt.common.configuration.providers import SECRETS_TOML, SECRETS_TOML_KEY, StringTomlProvider
from dlt.common.configuration.providers import SECRETS_TOML, SECRETS_TOML_KEY
from dlt.common.configuration.paths import make_dlt_settings_path
from dlt.common.configuration.utils import serialize_value
from dlt.common.git import is_dirty
Expand Down Expand Up @@ -393,12 +393,7 @@ def _echo_instructions(self, *args: Optional[Any]) -> None:
f" {SECRETS_TOML_KEY} variable."
)
fmt.echo()
toml_provider = StringTomlProvider("")
for s_v in self.secret_envs:
toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections)
for s_v in self.envs:
toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections)
fmt.echo(toml_provider.dumps())
self._echo_secrets_toml()
else:
raise ValueError(self.secrets_format)

Expand Down
45 changes: 42 additions & 3 deletions dlt/cli/deploy_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
import dlt

from dlt.common import git
from dlt.common.configuration.exceptions import LookupTrace
from dlt.common.configuration.exceptions import LookupTrace, ConfigFieldMissingException
from dlt.common.configuration.providers import ConfigTomlProvider, EnvironProvider
from dlt.common.configuration.providers.toml import BaseDocProvider, StringTomlProvider
from dlt.common.git import get_origin, get_repo, Repo
from dlt.common.configuration.specs.run_configuration import get_default_pipeline_name
from dlt.common.typing import StrAny
Expand Down Expand Up @@ -198,12 +199,50 @@ def _update_envs(self, trace: PipelineTrace) -> None:
# fmt.echo(f"{resolved_value.key} in {resolved_value.sections} moved to CONFIG")

def _echo_secrets(self) -> None:
display_info = False
for s_v in self.secret_envs:
fmt.secho("Name:", fg="green")
fmt.echo(fmt.bold(self.env_prov.get_key_name(s_v.key, *s_v.sections)))
fmt.secho("Secret:", fg="green")
fmt.echo(s_v.value)
try:
fmt.secho("Secret:", fg="green")
fmt.echo(self._lookup_secret_value(s_v))
except ConfigFieldMissingException:
fmt.secho("please set me up!", fg="red")
display_info = True
fmt.echo()
if display_info:
self._display_missing_secret_info()
fmt.echo()

def _echo_secrets_toml(self) -> None:
display_info = False
toml_provider = StringTomlProvider("")
for s_v in self.secret_envs:
try:
secret_value = self._lookup_secret_value(s_v)
except ConfigFieldMissingException:
secret_value = "please set me up!"
display_info = True
toml_provider.set_value(s_v.key, secret_value, None, *s_v.sections)
for s_v in self.envs:
toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections)
fmt.echo(toml_provider.dumps())
if display_info:
self._display_missing_secret_info()
fmt.echo()

def _display_missing_secret_info(self) -> None:
fmt.warning(
"We could not read and display some secrets. Starting from 1.0 version of dlt,"
" those are not stored in the traces. Instead we are trying to read them from the"
" available configuration ie. secrets.toml file. Please run the deploy command from"
" the same working directory you ran your pipeline script. If you pass the"
" credentials in code we will not be able to display them here. See"
" https://dlthub.com/docs/general-usage/credentials"
)

def _lookup_secret_value(self, trace: LookupTrace) -> Any:
return dlt.secrets[BaseDocProvider.get_key_name(trace.key, *trace.sections)]

def _echo_envs(self) -> None:
for v in self.envs:
Expand Down
8 changes: 5 additions & 3 deletions dlt/cli/echo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ def always_choose(always_choose_default: bool, always_choose_value: Any) -> Iter
_always_choose_value = ALWAYS_CHOOSE_VALUE
ALWAYS_CHOOSE_DEFAULT = always_choose_default
ALWAYS_CHOOSE_VALUE = always_choose_value
yield
ALWAYS_CHOOSE_DEFAULT = _always_choose_default
ALWAYS_CHOOSE_VALUE = _always_choose_value
try:
yield
finally:
ALWAYS_CHOOSE_DEFAULT = _always_choose_default
ALWAYS_CHOOSE_VALUE = _always_choose_value


echo = click.echo
Expand Down
8 changes: 4 additions & 4 deletions dlt/pipeline/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import os
import pickle
import datetime # noqa: 251
from typing import Any, List, NamedTuple, Optional, Protocol, Sequence
from typing import Any, List, NamedTuple, Optional, Protocol, Sequence, Union
import humanize

from dlt.common.pendulum import pendulum
from dlt.common.configuration import is_secret_hint
from dlt.common.configuration.exceptions import ContextDefaultCannotBeCreated
from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
from dlt.common.configuration.utils import _RESOLVED_TRACES
from dlt.common.configuration.utils import _RESOLVED_TRACES, ResolvedValueTrace
from dlt.common.configuration.container import Container
from dlt.common.exceptions import ExceptionTrace, ResourceNameNotAvailable
from dlt.common.logger import suppress_and_warn
Expand Down Expand Up @@ -280,8 +280,8 @@ def end_trace_step(
resolved_values = map(
lambda v: SerializableResolvedValueTrace(
v.key,
v.value,
v.default_value,
None if is_secret_hint(v.hint) else v.value,
None if is_secret_hint(v.hint) else v.default_value,
is_secret_hint(v.hint),
v.sections,
v.provider_name,
Expand Down
52 changes: 29 additions & 23 deletions tests/cli/test_deploy_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from dlt.pipeline.exceptions import CannotRestorePipelineException
from dlt.cli.deploy_command_helpers import get_schedule_description

from tests.utils import TEST_STORAGE_ROOT, test_storage
from tests.utils import TEST_STORAGE_ROOT, reset_providers, test_storage


DEPLOY_PARAMS = [
Expand Down Expand Up @@ -134,28 +134,34 @@ def test_deploy_command(
test_storage.delete(".dlt/secrets.toml")
test_storage.atomic_rename(".dlt/secrets.toml.ci", ".dlt/secrets.toml")

# this time script will run
venv.run_script("debug_pipeline.py")
with echo.always_choose(False, always_choose_value=True):
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
deploy_command.deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
)
_out = buf.getvalue()
print(_out)
# make sure our secret and config values are all present
assert "api_key_9x3ehash" in _out
assert "dlt_data" in _out
if "schedule" in deployment_args:
assert get_schedule_description(deployment_args["schedule"])
secrets_format = deployment_args.get("secrets_format", "env")
if secrets_format == "env":
assert "API_KEY" in _out
else:
assert "api_key = " in _out
# reset toml providers to (1) CWD (2) non existing dir so API_KEY is not found
for project_dir, api_key in [
(None, "api_key_9x3ehash"),
(".", "please set me up!"),
]:
with reset_providers(project_dir=project_dir):
# this time script will run
venv.run_script("debug_pipeline.py")
with echo.always_choose(False, always_choose_value=True):
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
deploy_command.deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
)
_out = buf.getvalue()
print(_out)
# make sure our secret and config values are all present
assert api_key in _out
assert "dlt_data" in _out
if "schedule" in deployment_args:
assert get_schedule_description(deployment_args["schedule"])
secrets_format = deployment_args.get("secrets_format", "env")
if secrets_format == "env":
assert "API_KEY" in _out
else:
assert "api_key = " in _out

# non existing script name
with pytest.raises(NoSuchPathError):
Expand Down
17 changes: 3 additions & 14 deletions tests/common/configuration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,11 @@
from dlt.common.configuration import configspec
from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration
from dlt.common.configuration.container import Container
from dlt.common.configuration.providers import (
ConfigProvider,
EnvironProvider,
ConfigTomlProvider,
SecretsTomlProvider,
)
from dlt.common.configuration.providers import ConfigProvider, EnvironProvider
from dlt.common.configuration.utils import get_resolved_traces
from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext
from dlt.common.typing import TSecretValue, StrAny
from tests.utils import _reset_providers


@configspec
Expand Down Expand Up @@ -118,14 +114,7 @@ def env_provider() -> Iterator[ConfigProvider]:

@pytest.fixture
def toml_providers() -> Iterator[ConfigProvidersContext]:
pipeline_root = "./tests/common/cases/configuration/.dlt"
ctx = ConfigProvidersContext()
ctx.providers.clear()
ctx.add_provider(EnvironProvider())
ctx.add_provider(SecretsTomlProvider(project_dir=pipeline_root))
ctx.add_provider(ConfigTomlProvider(project_dir=pipeline_root))
with Container().injectable_context(ctx):
yield ctx
yield from _reset_providers("./tests/common/cases/configuration/.dlt")


class MockProvider(ConfigProvider):
Expand Down
9 changes: 5 additions & 4 deletions tests/pipeline/test_pipeline_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,18 @@ def data():
assert resolved.is_secret_hint is False
assert resolved.default_value is None
assert resolved.provider_name == "config.toml"
# dictionaries are not returned anymore
# dictionaries are not returned anymore, secrets are masked
resolved = _find_resolved_value(trace.resolved_config_values, "credentials", [])
assert resolved is None or isinstance(resolved.value, str)
resolved = _find_resolved_value(trace.resolved_config_values, "secret_value", [])
assert resolved.is_secret_hint is True
assert resolved.value == "2137"
assert resolved.default_value == "123"
assert resolved.value is None, "Credential is not masked"
assert resolved.default_value is None, "Credential is not masked"
resolved = _find_resolved_value(trace.resolved_config_values, "credentials", ["databricks"])
assert resolved.is_secret_hint is True
assert resolved.value == databricks_creds
assert resolved.value is None, "Credential is not masked"
assert_trace_serializable(trace)

# activate pipeline because other was running in assert trace
p.activate()

Expand Down
24 changes: 23 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import multiprocessing
import os
import platform
Expand All @@ -12,7 +13,12 @@

import dlt
from dlt.common.configuration.container import Container
from dlt.common.configuration.providers import DictionaryProvider
from dlt.common.configuration.providers import (
DictionaryProvider,
EnvironProvider,
SecretsTomlProvider,
ConfigTomlProvider,
)
from dlt.common.configuration.resolve import resolve_configuration
from dlt.common.configuration.specs import RunConfiguration
from dlt.common.configuration.specs.config_providers_context import (
Expand Down Expand Up @@ -382,3 +388,19 @@ def assert_query_data(
# the second is load id
if info:
assert row[1] in info.loads_ids


@contextlib.contextmanager
def reset_providers(project_dir: str) -> Iterator[ConfigProvidersContext]:
"""Context manager injecting standard set of providers where toml providers are initialized from `project_dir`"""
return _reset_providers(project_dir)


def _reset_providers(project_dir: str) -> Iterator[ConfigProvidersContext]:
ctx = ConfigProvidersContext()
ctx.providers.clear()
ctx.add_provider(EnvironProvider())
ctx.add_provider(SecretsTomlProvider(project_dir=project_dir))
ctx.add_provider(ConfigTomlProvider(project_dir=project_dir))
with Container().injectable_context(ctx):
yield ctx

0 comments on commit 1714801

Please sign in to comment.