forked from joellabes/dbt-manifest-differ
-
Notifications
You must be signed in to change notification settings - Fork 0
/
differ.py
149 lines (118 loc) · 6.13 KB
/
differ.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import streamlit as st
from streamlit.runtime.uploaded_file_manager import UploadedFile
import json
import jsondiff
import pandas as pd
from functions.flatten import flatten_keys
from functions import tidy
# Minimal viable imports from dbt-core
from dbt.contracts.graph.manifest import WritableManifest
from dbt.graph.selector_methods import StateSelectorMethod
class MockPreviousState:
def __init__(self, manifest: WritableManifest) -> None:
self.manifest: Manifest = manifest
st.set_page_config(layout="wide")
st.title("dbt Manifest Differ")
st.info("""Work out why your models built in a Slim CI run.
Upload your production manifest on the left side, and the manifest from your Slim CI run on the right side.
False positives in `modified.configs` are likely to be due to `config()` blocks in a node's definition or in its `.yml` resource file.
To avoid false positives, define configs in `dbt_project.yml` instead. See [the docs on state comparison](https://docs.getdbt.com/reference/node-selection/state-comparison-caveats#false-positives) for more information.
""", icon = "💡")
left_col, right_col = st.columns(2)
left_manifest: WritableManifest = None
right_manifest: WritableManifest = None
# Copy-paste from https://github.com/dbt-labs/dbt-core/blob/0ab954e1af9bb2be01fa4ebad2df7626249a1fab/core/dbt/graph/selector_methods.py#L676
state_options = [
"modified",
"new",
"modified.body",
"modified.configs",
"modified.persisted_descriptions",
"modified.relation",
"modified.macros",
"modified.contract"
]
state_method = st.selectbox(label="State comparison method:", options=state_options)
properties_to_ignore = st.multiselect("Properties to ignore when showing node-level diffs:", ['created_at', 'root_path', 'build_path', 'compiled_path', 'deferred', 'schema', 'checksum', 'compiled_code', 'database', 'relation_name'], default=['created_at', 'checksum', 'database', 'schema', 'relation_name', 'compiled_path', 'root_path', 'build_path'])
skipped_large_seeds = set()
def load_manifest(file: UploadedFile) -> WritableManifest:
data = json.load(file)
data, large_seeds = tidy.remove_large_seeds(data)
skipped_large_seeds.update(large_seeds)
return WritableManifest.upgrade_schema_version(data)
left_file = left_col.file_uploader("First manifest", type='json', help="Pick your left json file")
if left_file is not None:
left_manifest = load_manifest(left_file)
right_file = right_col.file_uploader("Second manifest", type='json', help="Pick your right json file")
if right_file is not None:
right_manifest = load_manifest(right_file)
if left_file and right_file:
# TODO: also calculate diffs for sources, exposures, semantic_models, metrics
included_nodes = set(left_manifest.nodes.keys())
previous_state = MockPreviousState(right_manifest)
state_comparator = StateSelectorMethod(left_manifest, previous_state, "")
if len(skipped_large_seeds) > 0:
st.warning(f"Some large seeds couldn't be compared from the manifest alone: {skipped_large_seeds}" )
state_inclusion_counts = {}
state_inclusion_reasons_by_node = {}
for state_option in state_options:
results = list(state_comparator.search(included_nodes, state_option))
for node in results:
if node in state_inclusion_reasons_by_node:
state_inclusion_reasons_by_node[node].append(state_option)
else:
state_inclusion_reasons_by_node[node] = [state_option]
state_inclusion_counts[state_option] = len((results))
st.bar_chart(state_inclusion_counts)
selected_nodes = list(state_comparator.search(included_nodes, state_method))
if state_comparator.modified_macros:
st.header("Modified macros")
st.write(state_comparator.modified_macros)
st.header(f"{len(selected_nodes)} Selected node{'s' if len(selected_nodes) != 1 else ''}")
for unique_id in selected_nodes:
left_node = left_manifest.nodes.get(unique_id)
right_node = right_manifest.nodes.get(unique_id)
st.subheader(unique_id)
if left_node and right_node:
left_dict = left_node.to_dict()
right_dict = right_node.to_dict()
all_keys = set(left_dict.keys()) | set(right_dict.keys())
diffs = {
k: jsondiff.diff(
left_dict.get(k, None),
right_dict.get(k, None),
syntax='symmetric',
marshal=True
)
for k in all_keys
if k not in properties_to_ignore and (
k not in right_dict
or k not in left_dict
or left_dict[k] != right_dict[k]
)
}
st.write("##### State selectors that find this node:")
st.code(state_inclusion_reasons_by_node[unique_id])
if left_node.depends_on.macros and state_comparator.modified_macros:
st.write(f"Depends on macros: {left_node.depends_on.macros}")
diff_json, right_full_json = st.columns(2)
diff_json.write("##### JSON tree of diffs:")
diff_json.json(diffs, expanded=False)
right_full_json.write("##### JSON tree of all elements in right node:")
right_full_json.json(right_dict, expanded=False)
st.write("##### Flat table of diffs:")
try:
flattened_diff = flatten_keys(diffs)
df = pd.DataFrame.from_dict(flattened_diff, orient='index')
st.dataframe(df, use_container_width=True)
except Exception as e:
st.error(f"Couldn't print as table: {e}")
elif not left_node:
st.warning(f"Missing from left manifest (brand new node)")
elif not right_node:
st.warning(f"Missing from right manifest (deleted node)")
st.write("State methods that pick this node up:")
st.code(state_inclusion_reasons_by_node[unique_id])
st.divider()
else:
st.warning("Upload two manifests to begin comparison", icon="👯")