Skip to content

Commit

Permalink
[DC-3271] Remove irrelevant bq_utils import statements (#1841)
Browse files Browse the repository at this point in the history
* [DC-3632] Remove five cases of unused imports

* [DC-3632] Update 5 more usages of imports, some formatting occurred

* [DC-3632] remove 5 more usages of imports, some formatting occurred

* [DC-3632] remove 5 more usages of imports, some formatting occurred

* [DC-3632] remove final 2 occurrences, some formatting occurred

* [DC-3632] Add required import back
  • Loading branch information
Michael Schmidt committed Jan 22, 2024
1 parent dbec3b7 commit 9961da6
Show file tree
Hide file tree
Showing 21 changed files with 311 additions and 331 deletions.
25 changes: 12 additions & 13 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/cdr_person_id.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# # Person
# ## Person ID validation

import bq_utils
import utils.bq
from notebooks.parameters import RDR_DATASET_ID, EHR_DATASET_ID

Expand All @@ -13,7 +12,7 @@
hpo_ids = utils.bq.query("""
SELECT REPLACE(table_id, '_person', '') AS hpo_id
FROM `{EHR_DATASET_ID}.__TABLES__`
WHERE table_id LIKE '%person'
WHERE table_id LIKE '%person'
AND table_id NOT LIKE '%unioned_ehr_%' AND table_id NOT LIKE '\\\_%'
""".format(EHR_DATASET_ID=EHR_DATASET_ID)).hpo_id.tolist()

Expand All @@ -31,7 +30,7 @@
(SELECT COUNT(1) AS n
FROM {EHR_DATASET_ID}.{h}_person e
WHERE NOT EXISTS(
SELECT 1
SELECT 1
FROM {RDR_DATASET_ID}.person r
WHERE r.person_id = e.person_id)) not_in_rdr
ON TRUE
Expand Down Expand Up @@ -63,31 +62,31 @@
RDR_EHR_NAME_MATCH_QUERY = '''
WITH
rdr_first_name AS
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
OVER (PARTITION BY person_id, observation_source_value ORDER BY value_as_string) val
FROM {RDR_DATASET_ID}.observation
WHERE observation_source_value = 'PIIName_First'),
rdr_last_name AS
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
(SELECT DISTINCT person_id,
FIRST_VALUE(value_as_string)
OVER (PARTITION BY person_id, observation_source_value ORDER BY value_as_string) val
FROM {RDR_DATASET_ID}.observation
WHERE observation_source_value = 'PIIName_Last'),
rdr_name AS
(SELECT
(SELECT
f.person_id person_id,
f.val first_name,
f.val first_name,
l.val last_name
FROM rdr_first_name f JOIN rdr_last_name l USING (person_id))
SELECT
'{HPO_ID}' hpo_id,
rdr.person_id rdr_person_id,
rdr.first_name rdr_first_name,
rdr.last_name rdr_last_name,
rdr.person_id rdr_person_id,
rdr.first_name rdr_first_name,
rdr.last_name rdr_last_name,
pii.person_id pii_person_id,
pii.first_name pii_first_name,
pii.middle_name pii_middle_name,
Expand All @@ -97,7 +96,7 @@
FROM rdr_name rdr
JOIN `{EHR_DATASET_ID}.{HPO_ID}_pii_name` pii
ON pii.first_name = rdr.first_name
AND pii.last_name = rdr.last_name
AND pii.last_name = rdr.last_name
LEFT JOIN `{EHR_DATASET_ID}.{HPO_ID}_person` p
ON pii.person_id = p.person_id
'''
Expand Down
29 changes: 14 additions & 15 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/cohort_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
# - Record count (condition_occurrence)
#
# - We want to determine if these fluctations are potentially caused by OMOP vocabulary issues. If this is the case, we should be able to determine similar trends in AoU data.
import bq_utils
import utils.bq
from notebooks import parameters

Expand All @@ -42,8 +41,8 @@
q4_2018_hypo_obs_card_query = """
SELECT
DISTINCT
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita
FROM
Expand Down Expand Up @@ -190,8 +189,8 @@
q2_2019_hypo_obs_card_query = """
SELECT
DISTINCT
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita
FROM
Expand Down Expand Up @@ -339,14 +338,14 @@
SELECT
DISTINCT
q4.*, q2.*, (SUM(q2.num_persons) - SUM(q4.old_num_persons)) as person_difference,
(SUM(q2.num_records) - SUM(q4.old_num_records)) as record_difference
(SUM(q2.num_records) - SUM(q4.old_num_records)) as record_difference
FROM
(SELECT
DISTINCT
co.condition_concept_id as old_condition_concept_id, c.concept_name as old_concept_name,
COUNT(DISTINCT p.person_id) AS old_num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as old_num_records,
co.condition_concept_id as old_condition_concept_id, c.concept_name as old_concept_name,
COUNT(DISTINCT p.person_id) AS old_num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as old_num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as old_records_per_capita
FROM
Expand Down Expand Up @@ -378,13 +377,13 @@
GROUP BY 1, 2
ORDER BY old_num_persons DESC) q4
LEFT JOIN
(SELECT
DISTINCT
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
COUNT(DISTINCT co.condition_occurrence_id) as num_records,
ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita
FROM
Expand Down Expand Up @@ -416,10 +415,10 @@
GROUP BY 1, 2
ORDER BY num_persons DESC) q2
ON
q4.old_condition_concept_id = q2.condition_concept_id
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
ORDER BY old_num_persons DESC
Expand Down
27 changes: 13 additions & 14 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import warnings

import bq_utils
import utils.bq
from notebooks import parameters
warnings.filterwarnings('ignore')
Expand All @@ -33,11 +32,11 @@ def get_hpo_table_columns(hpo_id):
:param hpo_id: hpo site id
:return: dataframe with table name, column name and table row count
"""
query = """SELECT table_name, column_name, t.row_count as table_row_count, '{hpo_id}' as hpo_id
query = """SELECT table_name, column_name, t.row_count as table_row_count, '{hpo_id}' as hpo_id
FROM {dataset}.INFORMATION_SCHEMA.COLUMNS c
JOIN {dataset}.__TABLES__ t on c.table_name=t.table_id
WHERE STARTS_WITH(table_id, lower('{hpo_id}'))=true AND
NOT(table_id like '_mapping%') AND
NOT(table_id like '_mapping%') AND
(
table_id like '%person' OR
table_id like '%visit_occurrence' OR
Expand All @@ -59,25 +58,25 @@ def get_hpo_table_columns(hpo_id):


def create_hpo_completeness_query(table_columns, hpo_id):
query_with_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated
query_with_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated
FROM (
SELECT '{table_name}' as table_name, '{column_name}' as column_name,
'{hpo_id}' as site_name,
{table_row_count} as total_rows,
{table_row_count} as total_rows,
sum(case when {column_name}=0 then 0 else 1 end) as num_nonnulls_zeros,
({table_row_count} - count({column_name})) as non_populated_rows
FROM {dataset}.{table_name}
) as x
({table_row_count} - count({column_name})) as non_populated_rows
FROM {dataset}.{table_name}
) as x
"""
query_without_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated
query_without_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated
FROM (
SELECT '{table_name}' as table_name, '{column_name}' as column_name,
'{hpo_id}' as site_name,
{table_row_count} as total_rows,
count({column_name}) as num_nonnulls_zeros,
({table_row_count} - count({column_name})) as non_populated_rows
FROM {dataset}.{table_name}
) as x
{table_row_count} as total_rows,
count({column_name}) as num_nonnulls_zeros,
({table_row_count} - count({column_name})) as non_populated_rows
FROM {dataset}.{table_name}
) as x
"""
queries = []
for i, row in table_columns.iterrows():
Expand Down
15 changes: 7 additions & 8 deletions data_steward/analytics/cdr_ops/ad_hoc_analyses/deid_race.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
# +
import bq_utils
import utils.bq
from notebooks import render, parameters
import pandas as pd
Expand All @@ -27,19 +26,19 @@

MULTIRACIAL_DIST_QUERY = """
WITH race_combo AS
(SELECT o.person_id,
o.questionnaire_response_id,
(SELECT o.person_id,
o.questionnaire_response_id,
STRING_AGG(REPLACE(c.concept_code, 'WhatRaceEthnicity_', ''), ' ' ORDER BY value_source_value) selected_races
FROM {DATASET}.observation o
JOIN {VOCAB}.concept c ON o.value_source_concept_id = c.concept_id
JOIN {VOCAB}.concept c ON o.value_source_concept_id = c.concept_id
WHERE observation_source_concept_id = 1586140
GROUP BY person_id, questionnaire_response_id)
SELECT
selected_races,
SELECT
selected_races,
(LENGTH(selected_races) - LENGTH(REPLACE(selected_races, ' ', '')) + 1) AS selected_count,
COUNT(DISTINCT person_id) row_count
FROM race_combo
FROM race_combo
GROUP BY selected_races
ORDER BY selected_count, selected_races
"""
Expand Down
Loading

0 comments on commit 9961da6

Please sign in to comment.