[DC-3557] Update combined and curation_dashboard queries (#1800)

* [DC-3557] Update combined and curation_dashboard queries * [DC-3557] Remove default_datasets import from curation_dashboard.py
all-of-us · Nov 6, 2023 · 39e85f5 · 39e85f5
1 parent a52adb5
commit 39e85f5
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 7 deletions.
diff --git a/data_steward/analytics/cdr_ops/combined.py b/data_steward/analytics/cdr_ops/combined.py
@@ -122,18 +122,18 @@
 SELECT
  "{{table_name}}"     AS table_name
 ,"{{date_field}}"     AS date_field
-,t.{{date_field}}     AS date_value
+,DATE(t.{{date_field}})     AS date_value
 ,p.birth_datetime     AS birth_datetime
 FROM `{{dataset_id}}.{{table_name}}` t
  JOIN `{{dataset_id}}.person` p
   USING (person_id)
 WHERE
 (
  -- age <= 0y --
- t.{{date_field}} < DATE(p.birth_datetime)
+ DATE(t.{{date_field}}) < DATE(p.birth_datetime)
 
  -- age >= 150y --
- OR pipeline_tables.calculate_age(t.{{date_field}}, EXTRACT(DATE FROM p.birth_datetime)) >= 150
+ OR pipeline_tables.calculate_age(DATE(t.{{date_field}}), EXTRACT(DATE FROM p.birth_datetime)) >= 150
 )
 AND
 p.birth_datetime IS NOT NULL
@@ -256,7 +256,7 @@
       || 'USING (' || table_name ||'_id) '
       || 'LEFT JOIN consented c '
       || ' USING (person_id)'
-      || 'WHERE m.src_hpo_id <> "rdr" AND c.person_id IS NULL)'
+      || 'WHERE m.src_hpo_id NOT IN (\\"ce\\", \\"vibrent\\", \\"healthpro\\") AND c.person_id IS NULL)'
    , ' UNION ALL ')
  FROM `{{DATASET_ID}}.INFORMATION_SCHEMA.COLUMNS` c
  JOIN `{{DATASET_ID}}.__TABLES__` t

diff --git a/data_steward/analytics/cdr_ops/curation_dashboard.py b/data_steward/analytics/cdr_ops/curation_dashboard.py
@@ -20,7 +20,6 @@
 import seaborn as sns
 
 from common import PIPELINE_TABLES
-# from notebooks.defaults import DEFAULT_DATASETS
 from utils import bq
 
 warnings.filterwarnings('ignore')
@@ -87,7 +86,7 @@ def row_counts(dataset_ids):
 combined_df.to_csv('%s.csv' % 'combined_diff')
 
 ct_df = row_counts(ALL_CT_DATASET + [CT_DATASET])
-ct_df = combined_df.pivot(index='table_id',
+ct_df = ct_df.pivot(index='table_id',
                                 columns='dataset_id',
                                 values='row_count')
 ct_df.to_csv('%s.csv' % 'ct_diff')
@@ -207,7 +206,7 @@ def gender_by_race(dataset_id):
     ''')
     df['race'] = df['race'].astype('category')
     df['gender'] = df['gender'].astype('category')
-    g = sns.FacetGrid(df, col='race', hue='gender', col_wrap=5)
+    g = sns.FacetGrid(df, col='race', sharey=False, hue='gender', col_wrap=5)
     g.map(sns.barplot, 'gender', 'count', ci=None)
     g.set_xticklabels([])
     g.set_axis_labels('', '')