Skip to content

Commit

Permalink
[DC-3557] Update combined and curation_dashboard queries (#1800)
Browse files Browse the repository at this point in the history
* [DC-3557] Update combined and curation_dashboard queries

* [DC-3557] Remove default_datasets import from curation_dashboard.py
  • Loading branch information
brendagutman authored Nov 6, 2023
1 parent a52adb5 commit 39e85f5
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
8 changes: 4 additions & 4 deletions data_steward/analytics/cdr_ops/combined.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,18 +122,18 @@
SELECT
"{{table_name}}" AS table_name
,"{{date_field}}" AS date_field
,t.{{date_field}} AS date_value
,DATE(t.{{date_field}}) AS date_value
,p.birth_datetime AS birth_datetime
FROM `{{dataset_id}}.{{table_name}}` t
JOIN `{{dataset_id}}.person` p
USING (person_id)
WHERE
(
-- age <= 0y --
t.{{date_field}} < DATE(p.birth_datetime)
DATE(t.{{date_field}}) < DATE(p.birth_datetime)
-- age >= 150y --
OR pipeline_tables.calculate_age(t.{{date_field}}, EXTRACT(DATE FROM p.birth_datetime)) >= 150
OR pipeline_tables.calculate_age(DATE(t.{{date_field}}), EXTRACT(DATE FROM p.birth_datetime)) >= 150
)
AND
p.birth_datetime IS NOT NULL
Expand Down Expand Up @@ -256,7 +256,7 @@
|| 'USING (' || table_name ||'_id) '
|| 'LEFT JOIN consented c '
|| ' USING (person_id)'
|| 'WHERE m.src_hpo_id <> "rdr" AND c.person_id IS NULL)'
|| 'WHERE m.src_hpo_id NOT IN (\\"ce\\", \\"vibrent\\", \\"healthpro\\") AND c.person_id IS NULL)'
, ' UNION ALL ')
FROM `{{DATASET_ID}}.INFORMATION_SCHEMA.COLUMNS` c
JOIN `{{DATASET_ID}}.__TABLES__` t
Expand Down
5 changes: 2 additions & 3 deletions data_steward/analytics/cdr_ops/curation_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import seaborn as sns

from common import PIPELINE_TABLES
# from notebooks.defaults import DEFAULT_DATASETS
from utils import bq

warnings.filterwarnings('ignore')
Expand Down Expand Up @@ -87,7 +86,7 @@ def row_counts(dataset_ids):
combined_df.to_csv('%s.csv' % 'combined_diff')

ct_df = row_counts(ALL_CT_DATASET + [CT_DATASET])
ct_df = combined_df.pivot(index='table_id',
ct_df = ct_df.pivot(index='table_id',
columns='dataset_id',
values='row_count')
ct_df.to_csv('%s.csv' % 'ct_diff')
Expand Down Expand Up @@ -207,7 +206,7 @@ def gender_by_race(dataset_id):
''')
df['race'] = df['race'].astype('category')
df['gender'] = df['gender'].astype('category')
g = sns.FacetGrid(df, col='race', hue='gender', col_wrap=5)
g = sns.FacetGrid(df, col='race', sharey=False, hue='gender', col_wrap=5)
g.map(sns.barplot, 'gender', 'count', ci=None)
g.set_xticklabels([])
g.set_axis_labels('', '')
Expand Down

0 comments on commit 39e85f5

Please sign in to comment.