Skip to content

Commit

Permalink
Merge pull request #157 from brooklyn-data/feat/migration-to-v1-script
Browse files Browse the repository at this point in the history
Adding migration macro
  • Loading branch information
NiallRees authored Aug 4, 2022
2 parents 705c432 + a1a2209 commit 54e96ef
Show file tree
Hide file tree
Showing 2 changed files with 371 additions and 0 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,29 @@ vars:
]
```

## Migrating From <1.0.0 to >=1.0.0
To migrate your existing data from the `dbt-artifacts` versions <=0.8.0, a helper macro and guide is provided. This migration uses the old `fct_*` and `dim_*` models' data to populate the new sources. The steps to use the macro are as follows:

1. If not already completed, run `dbt run-operation create_dbt_artifacts_tables` to make your source tables.
2. Run `dbt run-operation migrate_from_v0_to_v1 --args '<see-below-for-arguments>'`.
3. Verify that the migration completes successfully.
4. Manually delete any database objects (sources, staging models, tables/views) from the previous `dbt-artifacts` version.

The arguments for `migrate_from_v0_to_v1` are as follows:
| argument | description |
|-------------- |---------------------------------------------------------- |
| `old_database` | the database of the <1.0.0 output (`fct_`/`dim_`) models |
| `old_schema` | the schema of the <1.0.0 output (`fct_`/`dim_`) models |
| `new_database` | the target database that the artifact sources are in |
| `new_schema` | the target schema that the artifact sources are in |

The old and new database/schemas *do not* have to be different, but it is explicitly defined for flexible support.

An example operation is as follows:
```bash
dbt run-operation migrate_from_v0_to_v1 --args '{old_database: analytics, old_schema: dbt_artifacts, new_database: analytics, new_schema: artifact_sources}'
```

## Acknowledgements
Thank you to [Tails.com](https://tails.com/gb/careers/) for initial development and maintenance of this package. On 2021/12/20, the repository was transferred from the Tails.com GitHub organization to Brooklyn Data Co.

Expand Down
348 changes: 348 additions & 0 deletions macros/migrate_from_v0_to_v1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,348 @@
{%- macro migrate_from_v0_to_v1(old_database, old_schema, new_database, new_schema) -%}

{% set migrate_model_executions %}
insert into {{new_database}}.{{new_schema}}.model_executions (
command_invocation_id,
compile_started_at,
materialization,
name,
node_id,
query_completed_at,
rows_affected,
schema,
status,
thread_id,
total_node_runtime,
was_full_refresh,
run_started_at
)
select
command_invocation_id,
compile_started_at,
model_materialization,
name,
node_id,
query_completed_at,
rows_affected,
model_schema,
status,
thread_id,
total_node_runtime,
was_full_refresh,
artifact_generated_at
from {{old_database}}.{{old_schema}}.fct_dbt__model_executions
{% endset %}

{{ log("Migrating model_executions", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_model_executions }}
{%- endcall -%}

{% set migrate_tests %}
insert into {{new_database}}.{{new_schema}}.tests (
command_invocation_id,
depends_on_nodes,
name,
node_id,
package_name,
tags,
test_path,
run_started_at
)
select
command_invocation_id,
depends_on_nodes,
name,
node_id,
package_name,
[],
test_path,
artifact_generated_at
from {{old_database}}.{{old_schema}}.dim_dbt__tests
{% endset %}

{{ log("Migrating tests", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_tests }}
{%- endcall -%}

{% set migrate_test_executions %}
insert into {{new_database}}.{{new_schema}}.test_executions (
command_invocation_id,
compile_started_at,
failures,
node_id,
query_completed_at,
rows_affected,
status,
thread_id,
total_node_runtime,
was_full_refresh,
run_started_at
)
select
command_invocation_id,
compile_started_at,
null,
node_id,
query_completed_at,
rows_affected,
status,
thread_id,
total_node_runtime,
was_full_refresh,
artifact_generated_at
from {{old_database}}.{{old_schema}}.fct_dbt__test_executions
{% endset %}

{{ log("Migrating test_executions", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_test_executions }}
{%- endcall -%}

{% set migrate_models %}
insert into {{new_database}}.{{new_schema}}.models (
checksum,
command_invocation_id,
database,
depends_on_nodes,
materialization,
name,
node_id,
package_name,
path,
schema,
run_started_at
)
select
checksum,
command_invocation_id,
model_database,
depends_on_nodes,
model_materialization,
name,
node_id,
package_name,
model_path,
model_schema,
artifact_generated_at
from {{old_database}}.{{old_schema}}.dim_dbt__models
{% endset %}

{{ log("Migrating models", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_models }}
{%- endcall -%}

{% set migrate_seeds %}
insert into {{new_database}}.{{new_schema}}.seeds (
checksum,
command_invocation_id,
database,
name,
node_id,
package_name,
path,
schema,
run_started_at
)
select
checksum,
command_invocation_id,
seed_database,
name,
node_id,
package_name,
seed_path,
seed_schema,
artifact_generated_at
from {{old_database}}.{{old_schema}}.dim_dbt__seeds
{% endset %}

{{ log("Migrating seeds", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_seeds }}
{%- endcall -%}

{% set migrate_seed_executions %}
insert into {{new_database}}.{{new_schema}}.seed_executions (
command_invocation_id,
compile_started_at,
materialization,
name,
node_id,
query_completed_at,
rows_affected,
schema,
status,
thread_id,
total_node_runtime,
was_full_refresh,
run_started_at
)
select
command_invocation_id,
compile_started_at,
'seed',
name,
node_id,
query_completed_at,
rows_affected,
seed_schema,
status,
thread_id,
total_node_runtime,
was_full_refresh,
artifact_generated_at
from {{old_database}}.{{old_schema}}.fct_dbt__seed_executions
{% endset %}

{{ log("Migrating seed_executions", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_seed_executions }}
{%- endcall -%}

{% set migrate_exposures %}
insert into {{new_database}}.{{new_schema}}.exposures (
command_invocation_id,
depends_on_nodes,
description,
maturity,
name,
node_id,
owner,
package_name,
path,
type,
url,
run_started_at
)
select
command_invocation_id,
[],
null,
maturity,
name,
node_id,
null, {#- v0 is a string, v1 is a variant -#}
package_name,
null,
type,
null,
artifact_generated_at
from {{old_database}}.{{old_schema}}.dim_dbt__exposures
{% endset %}

{{ log("Migrating exposures", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_exposures }}
{%- endcall -%}

{% set migrate_snapshots %}
insert into {{new_database}}.{{new_schema}}.snapshots (
checksum,
command_invocation_id,
database,
depends_on_nodes,
name,
node_id,
package_name,
path,
schema,
strategy,
run_started_at
)
select
checksum,
command_invocation_id,
snapshot_database,
depends_on_nodes,
name,
node_id,
package_name,
snapshot_path,
snapshot_schema,
null,
artifact_generated_at
from {{old_database}}.{{old_schema}}.dim_dbt__snapshots
{% endset %}

{{ log("Migrating snapshots", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_snapshots }}
{%- endcall -%}

{% set migrate_snapshot_executions %}
insert into {{new_database}}.{{new_schema}}.snapshot_executions (
command_invocation_id,
compile_started_at,
materialization,
name,
node_id,
query_completed_at,
rows_affected,
schema,
status,
thread_id,
total_node_runtime,
was_full_refresh,
run_started_at
)
select
command_invocation_id,
compile_started_at,
'snapshot',
name,
node_id,
query_completed_at,
rows_affected,
snapshot_schema,
status,
thread_id,
total_node_runtime,
was_full_refresh,
artifact_generated_at
from {{old_database}}.{{old_schema}}.fct_dbt__snapshot_executions
{% endset %}

{{ log("Migrating snapshot_executions", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_snapshot_executions }}
{%- endcall -%}

{% set migrate_sources %}
insert into {{new_database}}.{{new_schema}}.sources (
command_invocation_id,
database,
freshness,
identifier,
loaded_at_field,
loader,
name,
node_id,
schema,
source_name,
run_started_at
)
select
command_invocation_id,
node_database,
parse_json('[{"error_after":{"count":null,"period":null},"filter":null,"warn_after":{"count":null,"period":null}}]'),
name,
null,
source_loader,
name,
node_id,
source_schema,
source_name,
artifact_generated_at
from {{old_database}}.{{old_schema}}.dim_dbt__sources
{% endset %}

{{ log("Migrating sources", info=True) }}
{%- call statement(auto_begin=True) -%}
{{ migrate_sources }}
{%- endcall -%}

{{ log("Migration complete. You can now safely delete any data from before 1.0.0", info=True) }}
{%- endmacro -%}

0 comments on commit 54e96ef

Please sign in to comment.