Skip to content

Commit

Permalink
feat(dbt): load dbt scaffold using package_data (#15502)
Browse files Browse the repository at this point in the history
## Summary & Motivation
Allow `dagster-dbt project scaffold` to load dbt projects from their
path in `package_data`. This will be invoked in the dbt NUX.

We hardcode `dbt-project` as the dbt project directory name for the
`package_data`. This restraint could be loosened in the future.

## How I Tested These Changes
- pytest
- dbt NUX creates a pull request that builds to branch deployment:
https://github.com/dagster-io/test-dagster-dbt-scaffold/pull/53
- Github actions that build a Dagster package utilizing `package_data`
dagster-io/dagster-cloud-action#147
  • Loading branch information
rexledesma authored and benpankow committed Jul 27, 2023
1 parent 23f9af1 commit 257fd92
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 4 deletions.
15 changes: 15 additions & 0 deletions python_modules/libraries/dagster-dbt/dagster_dbt/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def copy_scaffold(
project_name: str,
dagster_project_dir: Path,
dbt_project_dir: Path,
use_dbt_project_package_data_dir: bool,
) -> None:
shutil.copytree(src=STARTER_PROJECT_PATH, dst=dagster_project_dir)
dagster_project_dir.joinpath("__init__.py").unlink()
Expand All @@ -90,6 +91,9 @@ def copy_scaffold(
for target in profile["outputs"].values()
]

if use_dbt_project_package_data_dir:
dbt_project_dir = dagster_project_dir.joinpath("dbt-project")

dbt_project_dir_relative_path = Path(
os.path.relpath(
dbt_project_dir,
Expand Down Expand Up @@ -119,6 +123,7 @@ def copy_scaffold(
dbt_assets_name=f"{dbt_project_name}_dbt_assets",
dbt_adapter_packages=dbt_adapter_packages,
project_name=project_name,
use_dbt_project_package_data_dir=use_dbt_project_package_data_dir,
).dump(destination_path)

path.unlink()
Expand Down Expand Up @@ -157,6 +162,15 @@ def project_scaffold_command(
resolve_path=True,
),
] = Path.cwd(),
use_dbt_project_package_data_dir: Annotated[
bool,
typer.Option(
default=...,
help="Controls whether the dbt project package data directory is used.",
is_flag=True,
hidden=True,
),
] = False,
) -> None:
"""This command will initialize a new Dagster project and create directories and files that
load assets from an existing dbt project.
Expand All @@ -176,6 +190,7 @@ def project_scaffold_command(
project_name=project_name,
dagster_project_dir=dagster_project_dir,
dbt_project_dir=dbt_project_dir,
use_dbt_project_package_data_dir=use_dbt_project_package_data_dir,
)

console.print(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ from pathlib import Path
from dagster import Definitions, OpExecutionContext
from dagster_dbt import DbtCliResource, build_schedule_from_dbt_selection, dbt_assets

{% if use_dbt_project_package_data_dir -%}
# We expect the dbt project to be installed as package data.
# For details, see https://docs.python.org/3/distutils/setupscript.html#installing-package-data.
{%- endif %}
dbt_project_dir = Path(__file__).parent.joinpath({{ dbt_project_dir_relative_path_parts | join(', ')}})
dbt = DbtCliResource(project_dir=os.fspath(dbt_project_dir))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ setup(
name="{{ project_name }}",
version="0.0.1",
packages=find_packages(),
{% if use_dbt_project_package_data_dir -%}
package_data={
"{{ project_name }}": [
"dbt-project/**/*",
],
},
{%- endif %}
install_requires=[
"dagster",
"dagster-cloud",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,16 @@ def dbt_project_dir_fixture(tmp_path: Path) -> Path:
return dbt_project_dir


@pytest.mark.parametrize("use_dbt_project_package_data_dir", [True, False])
def test_project_scaffold_command_with_precompiled_manifest(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path, dbt_project_dir: Path
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
dbt_project_dir: Path,
use_dbt_project_package_data_dir: bool,
) -> None:
monkeypatch.chdir(tmp_path)

project_name = "test_dagster_scaffold"
project_name = f"test_dagster_scaffold_{use_dbt_project_package_data_dir}"
dagster_project_dir = tmp_path.joinpath(project_name)

result = runner.invoke(
Expand All @@ -50,6 +54,7 @@ def test_project_scaffold_command_with_precompiled_manifest(
project_name,
"--dbt-project-dir",
os.fspath(dbt_project_dir),
*(["--use-dbt-project-package-data-dir"] if use_dbt_project_package_data_dir else []),
],
)

Expand All @@ -61,6 +66,13 @@ def test_project_scaffold_command_with_precompiled_manifest(
assert not any(path.suffix == ".jinja" for path in dagster_project_dir.glob("**/*"))
assert "dbt-duckdb" in dagster_project_dir.joinpath("setup.py").read_text()

if use_dbt_project_package_data_dir:
dbt_project_dir = dagster_project_dir.joinpath("dbt-project")
shutil.copytree(
src=test_dagster_metadata_dbt_project_path,
dst=dbt_project_dir,
)

subprocess.run(["dbt", "compile"], cwd=dbt_project_dir, check=True)

assert dbt_project_dir.joinpath("target", "manifest.json").exists()
Expand All @@ -82,12 +94,16 @@ def test_project_scaffold_command_with_precompiled_manifest(
assert materialize_dbt_models_schedule.cron_schedule == "0 0 * * *"


@pytest.mark.parametrize("use_dbt_project_package_data_dir", [True, False])
def test_project_scaffold_command_with_runtime_manifest(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path, dbt_project_dir: Path
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
dbt_project_dir: Path,
use_dbt_project_package_data_dir: bool,
) -> None:
monkeypatch.chdir(tmp_path)

project_name = "test_dagster_scaffold_runtime_manifest"
project_name = f"test_dagster_scaffold_runtime_manifest_{use_dbt_project_package_data_dir}"
dagster_project_dir = tmp_path.joinpath(project_name)

result = runner.invoke(
Expand All @@ -99,6 +115,7 @@ def test_project_scaffold_command_with_runtime_manifest(
project_name,
"--dbt-project-dir",
os.fspath(dbt_project_dir),
*(["--use-dbt-project-package-data-dir"] if use_dbt_project_package_data_dir else []),
],
)

Expand All @@ -111,6 +128,13 @@ def test_project_scaffold_command_with_runtime_manifest(
assert not dbt_project_dir.joinpath("target", "manifest.json").exists()
assert "dbt-duckdb" in dagster_project_dir.joinpath("setup.py").read_text()

if use_dbt_project_package_data_dir:
dbt_project_dir = dagster_project_dir.joinpath("dbt-project")
shutil.copytree(
src=test_dagster_metadata_dbt_project_path,
dst=dbt_project_dir,
)

monkeypatch.chdir(tmp_path)
sys.path.append(os.fspath(tmp_path))

Expand Down

0 comments on commit 257fd92

Please sign in to comment.