diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/cdr_person_id.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/cdr_person_id.py
index 6a4cae5c22..137cf93d92 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/cdr_person_id.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/cdr_person_id.py
@@ -1,7 +1,6 @@
 # # Person
 # ## Person ID validation
 
-import bq_utils
 import utils.bq
 from notebooks.parameters import RDR_DATASET_ID, EHR_DATASET_ID
 
@@ -13,7 +12,7 @@
 hpo_ids = utils.bq.query("""
 SELECT REPLACE(table_id, '_person', '') AS hpo_id
 FROM `{EHR_DATASET_ID}.__TABLES__`
-WHERE table_id LIKE '%person' 
+WHERE table_id LIKE '%person'
 AND table_id NOT LIKE '%unioned_ehr_%' AND table_id NOT LIKE '\\\_%'
 """.format(EHR_DATASET_ID=EHR_DATASET_ID)).hpo_id.tolist()
 
@@ -31,7 +30,7 @@
 (SELECT COUNT(1) AS n
  FROM {EHR_DATASET_ID}.{h}_person e
  WHERE NOT EXISTS(
-  SELECT 1 
+  SELECT 1
   FROM {RDR_DATASET_ID}.person r
   WHERE r.person_id = e.person_id)) not_in_rdr
  ON TRUE
@@ -63,31 +62,31 @@
 RDR_EHR_NAME_MATCH_QUERY = '''
 WITH
   rdr_first_name AS
-  (SELECT DISTINCT person_id, 
-   FIRST_VALUE(value_as_string) 
+  (SELECT DISTINCT person_id,
+   FIRST_VALUE(value_as_string)
      OVER (PARTITION BY person_id, observation_source_value ORDER BY value_as_string) val
   FROM {RDR_DATASET_ID}.observation
   WHERE observation_source_value = 'PIIName_First'),
 
   rdr_last_name AS
-  (SELECT DISTINCT person_id, 
-   FIRST_VALUE(value_as_string) 
+  (SELECT DISTINCT person_id,
+   FIRST_VALUE(value_as_string)
      OVER (PARTITION BY person_id, observation_source_value ORDER BY value_as_string) val
   FROM {RDR_DATASET_ID}.observation
   WHERE observation_source_value = 'PIIName_Last'),
 
   rdr_name AS
-  (SELECT 
+  (SELECT
      f.person_id person_id,
-     f.val       first_name, 
+     f.val       first_name,
      l.val       last_name
    FROM rdr_first_name f JOIN rdr_last_name l USING (person_id))
 
  SELECT
    '{HPO_ID}'                 hpo_id,
-   rdr.person_id              rdr_person_id, 
-   rdr.first_name             rdr_first_name, 
-   rdr.last_name              rdr_last_name, 
+   rdr.person_id              rdr_person_id,
+   rdr.first_name             rdr_first_name,
+   rdr.last_name              rdr_last_name,
    pii.person_id              pii_person_id,
    pii.first_name             pii_first_name,
    pii.middle_name            pii_middle_name,
@@ -97,7 +96,7 @@
  FROM rdr_name rdr
  JOIN `{EHR_DATASET_ID}.{HPO_ID}_pii_name` pii
    ON  pii.first_name = rdr.first_name
-   AND pii.last_name  = rdr.last_name 
+   AND pii.last_name  = rdr.last_name
  LEFT JOIN `{EHR_DATASET_ID}.{HPO_ID}_person` p
    ON pii.person_id = p.person_id
 '''
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/cohort_testing.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/cohort_testing.py
index a119882de8..8bdcb29c29 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/cohort_testing.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/cohort_testing.py
@@ -21,7 +21,6 @@
 #     - Record count (condition_occurrence)
 #
 # - We want to determine if these fluctations are potentially caused by OMOP vocabulary issues. If this is the case, we should be able to determine similar trends in AoU data.
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -42,8 +41,8 @@
 q4_2018_hypo_obs_card_query = """
 SELECT
 DISTINCT
-co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons, 
-COUNT(DISTINCT co.condition_occurrence_id) as num_records, 
+co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
+COUNT(DISTINCT co.condition_occurrence_id) as num_records,
 ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita
 
 FROM
@@ -190,8 +189,8 @@
 q2_2019_hypo_obs_card_query = """
 SELECT
 DISTINCT
-co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons, 
-COUNT(DISTINCT co.condition_occurrence_id) as num_records, 
+co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
+COUNT(DISTINCT co.condition_occurrence_id) as num_records,
 ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita
 
 FROM
@@ -339,14 +338,14 @@
 SELECT
 DISTINCT
 q4.*, q2.*, (SUM(q2.num_persons) - SUM(q4.old_num_persons)) as person_difference,
-(SUM(q2.num_records) - SUM(q4.old_num_records)) as record_difference 
+(SUM(q2.num_records) - SUM(q4.old_num_records)) as record_difference
 FROM
 
     (SELECT
     DISTINCT
-    co.condition_concept_id as old_condition_concept_id, c.concept_name as old_concept_name, 
-    COUNT(DISTINCT p.person_id) AS old_num_persons, 
-    COUNT(DISTINCT co.condition_occurrence_id) as old_num_records, 
+    co.condition_concept_id as old_condition_concept_id, c.concept_name as old_concept_name,
+    COUNT(DISTINCT p.person_id) AS old_num_persons,
+    COUNT(DISTINCT co.condition_occurrence_id) as old_num_records,
     ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as old_records_per_capita
 
     FROM
@@ -378,13 +377,13 @@
 
     GROUP BY 1, 2
     ORDER BY old_num_persons DESC) q4
-    
+
     LEFT JOIN
-    
+
     (SELECT
     DISTINCT
-    co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons, 
-    COUNT(DISTINCT co.condition_occurrence_id) as num_records, 
+    co.condition_concept_id, c.concept_name, COUNT(DISTINCT p.person_id) AS num_persons,
+    COUNT(DISTINCT co.condition_occurrence_id) as num_records,
     ROUND(COUNT(DISTINCT co.condition_occurrence_id) / COUNT(DISTINCT p.person_id), 2) as records_per_capita
 
     FROM
@@ -416,10 +415,10 @@
 
     GROUP BY 1, 2
     ORDER BY num_persons DESC) q2
-    
+
     ON
     q4.old_condition_concept_id = q2.condition_concept_id
-    
+
     GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
     ORDER BY old_num_persons DESC
 
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/coverage.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/coverage.py
index 23755d5bf7..b136297338 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/coverage.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/coverage.py
@@ -14,7 +14,6 @@
 
 import warnings
 
-import bq_utils
 import utils.bq
 from notebooks import parameters
 warnings.filterwarnings('ignore')
@@ -33,11 +32,11 @@ def get_hpo_table_columns(hpo_id):
     :param hpo_id: hpo site id
     :return: dataframe with table name, column name and table row count
     """
-    query = """SELECT table_name, column_name, t.row_count as table_row_count, '{hpo_id}' as hpo_id 
+    query = """SELECT table_name, column_name, t.row_count as table_row_count, '{hpo_id}' as hpo_id
                FROM {dataset}.INFORMATION_SCHEMA.COLUMNS c
                JOIN {dataset}.__TABLES__ t on c.table_name=t.table_id
                WHERE STARTS_WITH(table_id, lower('{hpo_id}'))=true AND
-               NOT(table_id like '_mapping%') AND 
+               NOT(table_id like '_mapping%') AND
                 (
                   table_id like '%person' OR
                   table_id like '%visit_occurrence' OR
@@ -59,25 +58,25 @@ def get_hpo_table_columns(hpo_id):
 
 
 def create_hpo_completeness_query(table_columns, hpo_id):
-    query_with_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated 
+    query_with_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated
        FROM (
             SELECT '{table_name}' as table_name, '{column_name}' as column_name,
                    '{hpo_id}' as site_name,
-                   {table_row_count} as total_rows, 
+                   {table_row_count} as total_rows,
                    sum(case when {column_name}=0 then 0 else 1 end) as num_nonnulls_zeros,
-                   ({table_row_count} - count({column_name})) as non_populated_rows 
-                   FROM {dataset}.{table_name} 
-        ) as x 
+                   ({table_row_count} - count({column_name})) as non_populated_rows
+                   FROM {dataset}.{table_name}
+        ) as x
     """
-    query_without_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated 
+    query_without_concept_id = """SELECT current_datetime() as report_run_time, x.*, CASE WHEN total_rows=0 THEN 0 ELSE (num_nonnulls_zeros)/(total_rows) END as percent_field_populated
        FROM (
             SELECT '{table_name}' as table_name, '{column_name}' as column_name,
                    '{hpo_id}' as site_name,
-                   {table_row_count} as total_rows, 
-                   count({column_name}) as num_nonnulls_zeros, 
-                   ({table_row_count} - count({column_name})) as non_populated_rows 
-                   FROM {dataset}.{table_name} 
-        ) as x 
+                   {table_row_count} as total_rows,
+                   count({column_name}) as num_nonnulls_zeros,
+                   ({table_row_count} - count({column_name})) as non_populated_rows
+                   FROM {dataset}.{table_name}
+        ) as x
     """
     queries = []
     for i, row in table_columns.iterrows():
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/deid_race.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/deid_race.py
index d9836fe200..9a74fe4da5 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/deid_race.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/deid_race.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 # +
-import bq_utils
 import utils.bq
 from notebooks import render, parameters
 import pandas as pd
@@ -27,19 +26,19 @@
 
 MULTIRACIAL_DIST_QUERY = """
 WITH race_combo AS
-(SELECT o.person_id, 
-  o.questionnaire_response_id, 
+(SELECT o.person_id,
+  o.questionnaire_response_id,
   STRING_AGG(REPLACE(c.concept_code, 'WhatRaceEthnicity_', ''), ' ' ORDER BY value_source_value) selected_races
  FROM {DATASET}.observation o
- JOIN {VOCAB}.concept c ON o.value_source_concept_id = c.concept_id  
+ JOIN {VOCAB}.concept c ON o.value_source_concept_id = c.concept_id
  WHERE observation_source_concept_id = 1586140
  GROUP BY person_id, questionnaire_response_id)
- 
-SELECT 
-  selected_races, 
+
+SELECT
+  selected_races,
   (LENGTH(selected_races) - LENGTH(REPLACE(selected_races, ' ', '')) + 1) AS selected_count,
   COUNT(DISTINCT person_id) row_count
-FROM race_combo 
+FROM race_combo
 GROUP BY selected_races
 ORDER BY selected_count, selected_races
 """
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_demographics_by_site.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_demographics_by_site.py
index db02e480c9..442b4d6c51 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_demographics_by_site.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_demographics_by_site.py
@@ -26,7 +26,6 @@
 client = bigquery.Client()
 # %load_ext google.cloud.bigquery
 
-import bq_utils
 import utils.bq
 from notebooks import parameters
 # %matplotlib inline
@@ -95,18 +94,18 @@
 racial_distribution_by_site_query = """
 SELECT
 DISTINCT
-a.*, b.number_from_site, ROUND(a.number_of_demographic / b.number_from_site * 100, 2) as percent_of_site_persons 
+a.*, b.number_from_site, ROUND(a.number_of_demographic / b.number_from_site * 100, 2) as percent_of_site_persons
 FROM
   (SELECT
   DISTINCT
-  mp.src_hpo_id, p.race_concept_id, c.concept_name, 
+  mp.src_hpo_id, p.race_concept_id, c.concept_name,
   COUNT(p.race_concept_id) as number_of_demographic,
   FROM
   `{DATASET}.unioned_ehr_person` p
   LEFT JOIN
   `{DATASET}._mapping_person` mp
   ON
-  p.person_id = mp.src_person_id 
+  p.person_id = mp.src_person_id
   LEFT JOIN
   `{DATASET}.concept` c
   ON
@@ -141,17 +140,17 @@ def return_hpos_to_display(hpo_names, max_num_sites_to_display):
     Function is intended to return a means for divide the number of HPOs into an
     appropriate number of lists based on the maximum number of sites a user
     wants to display.
-    
+
     This is useful for creating graphs that will only display a fraction of the
     total HPOs.
-    
+
     Parameters
     ----------
     hpo_names (list): list of all the health provider organizations (in string form)
-    
+
     num_sites_to_display (int): user-specified number of sites to display in each graph
-    
-    
+
+
     Returns
     -------
     all_hpos (list): contains several lists, each of which contains a number of sites
@@ -195,17 +194,17 @@ def create_information_dictionary_for_sites(hpo_dfs, selected_hpo_names,
     """
     Function is used to create a dictionary that contains the racial makeup of a selected
     number of sites (expressed as a percentage, from a source dataframe)
-    
+
     Parameters
     ----------
     hpo_dfs (dictonary): has the following structure
         key: string representing an HPO ID
         value: dataframe that contains information about the different race concepts (IDs
                and names) and their relative spread within the site
-    
+
     selected_hpo_names (list): contains strings that represent the different HPOs that will
         ultimately be translated to a dictionary
-        
+
 
     most_popular_race_cids (list): list of the most popular concept IDs (across all sites)
 
@@ -253,23 +252,23 @@ def create_information_dictionary_for_sites(hpo_dfs, selected_hpo_names,
 def create_graphs(hpo_names_to_display, num_races_for_legend,
                   racial_percentages, img_name):
     """
-    Function is used to create and save graphs that show the racial distribution for 
+    Function is used to create and save graphs that show the racial distribution for
     a selected number of sites
-    
+
     Parameters
     ----------
     hpo_names_to_display (list): list with a user-specified number of HPOs that are to
         be displayed in the graph
-        
+
     num_races_for_legend (int): the number of races that are to be displayed next
         to the graph
-        
+
     racial_percentages (dictionary): has the following structure
         key: race concept ID
         value: list, each index represents one of the sites in the 'selected_hpo_names'
                parameter. the value represents the proportion of persons from the HPO
                who have the reported race concept ID
-               
+
     img_name (string): name for the image to be displayed
     """
     num_sites_to_display = len(hpo_names_to_display)
@@ -408,46 +407,46 @@ def create_query_for_particular_table(dataset, percent_of_table, table_name):
             - number of IDs for that particular group in the specified table
             - total number of IDs for the HPO
             - percentage of the records for the site that belong to that demographic class
-            
+
     This query is then run through bigquery and returns a dataframe
-         
-         
+
+
     Parameters
     ----------
     dataset (str): dataset to be queried (defined at the top of the workbook)
-    
+
     percent_of_table (str): the string to represent the percentage of the records for the
                             site that belong to the particular demographic class
-    
+
     table_name (str): name of the table to be investigated
-    
-    
+
+
     Returns
     -------
     dataframe (df): contains the information specified in the top of the docstring
-    
+
     """
 
     query = """
     SELECT
     DISTINCT
-    a.src_hpo_id, a.race_concept_id, a.concept_name, 
-    ROUND(a.number_of_demographic / b.number_from_site * 100, 2) as {percent_of_table} 
+    a.src_hpo_id, a.race_concept_id, a.concept_name,
+    ROUND(a.number_of_demographic / b.number_from_site * 100, 2) as {percent_of_table}
     FROM
       (SELECT
       DISTINCT
-      mp.src_hpo_id, p.race_concept_id, c.concept_name, 
+      mp.src_hpo_id, p.race_concept_id, c.concept_name,
       COUNT(p.race_concept_id) as number_of_demographic,
       FROM
       `{dataset}.unioned_ehr_{table_name}` x
       LEFT JOIN
       `{dataset}.unioned_ehr_person` p
       ON
-      x.person_id = p.person_id 
+      x.person_id = p.person_id
       LEFT JOIN
       `{dataset}._mapping_person` mp
       ON
-      p.person_id = mp.src_person_id 
+      p.person_id = mp.src_person_id
       LEFT JOIN
       `{dataset}.concept` c
       ON
@@ -464,7 +463,7 @@ def create_query_for_particular_table(dataset, percent_of_table, table_name):
       LEFT JOIN
       `{dataset}.unioned_ehr_person` p
       ON
-        x.person_id = p.person_id 
+        x.person_id = p.person_id
       LEFT JOIN
       `{dataset}._mapping_person` mp
       ON
@@ -549,13 +548,13 @@ def find_all_distributions_for_site_race_combo(df, hpo, race,
     This function is used to calculate the relative 'underrepresentation' of a given
     race for a particular table when compared to the race's overall representation in
     the person table.
-    
+
     For instance, a site may have 65% participants who identify as 'White'. The persons
     who identify with this race, however, only make up 60% of the drug_exposure_ids in
     the drug exposure table. This would result in a 'underrepresentation' of 5% for
     persons at this particular site for this particular table.
-    
-    
+
+
     Parameters
     ----------
     df (df): dataframe that contains the following information in its fields:
@@ -567,15 +566,15 @@ def find_all_distributions_for_site_race_combo(df, hpo, race,
                                          aforementioned race_concept_id
         e. the same metric as d but also for the condition, observation, procedure,
            and visit tables
-           
+
     hpo (string): HPO whose 'representation' metric is going to be assessed
-    
+
     race (string): race concept name that will be evaluated for 'representation'
-    
+
     person_distribution: the proportion of person_ids for the particular site that
                          belong to the aforementioned race
-                         
-    
+
+
     Returns
     -------
     difference_df: contains the 'difference' between the proportion of records
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_ops_row_counts.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_ops_row_counts.py
index deb579b9d3..2f2501fb8f 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_ops_row_counts.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/ehr_ops_row_counts.py
@@ -16,7 +16,6 @@
 
 # +
 import datetime
-import bq_utils
 import utils.bq
 from notebooks.parameters import RDR_PROJECT_ID, RDR_DATASET_ID, EHR_DATASET_ID
 
@@ -65,17 +64,17 @@
 # ## EHR Site Submission Counts
 
 utils.bq.query('''
-SELECT 
+SELECT
   l.Org_ID AS org_id,
   l.HPO_ID AS hpo_id,
   l.Site_Name AS site_name,
-  table_id AS table_id, 
+  table_id AS table_id,
   row_count AS row_count
 FROM `{EHR_DATASET_ID}.__TABLES__` AS t
-JOIN `lookup_tables.hpo_site_id_mappings` AS l  
+JOIN `lookup_tables.hpo_site_id_mappings` AS l
   ON STARTS_WITH(table_id,lower(l.HPO_ID))=true
 WHERE table_id like '%person%' AND
-NOT(table_id like '%unioned_ehr_%') AND 
+NOT(table_id like '%unioned_ehr_%') AND
 l.hpo_id <> ''
 ORDER BY Display_Order
 '''.format(EHR_DATASET_ID=EHR_DATASET_ID))
@@ -84,7 +83,7 @@
 hpo_ids = utils.bq.query("""
 SELECT REPLACE(table_id, '_person', '') AS hpo_id
 FROM `{EHR_DATASET_ID}.__TABLES__`
-WHERE table_id LIKE '%person' 
+WHERE table_id LIKE '%person'
 AND table_id NOT LIKE '%unioned_ehr_%' AND table_id NOT LIKE '\\\_%'
 """.format(EHR_DATASET_ID=EHR_DATASET_ID)).hpo_id.tolist()
 
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/generalized_dupes.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/generalized_dupes.py
index 271dde74a1..d6dc0ce230 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/generalized_dupes.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/generalized_dupes.py
@@ -1,5 +1,4 @@
 # +
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -17,7 +16,7 @@
   COUNT(*)
 FROM
   `{DEID}.observation` AS o
-JOIN 
+JOIN
 (
   SELECT
     observation_id
@@ -31,7 +30,7 @@
         observation_id DESC) AS rank_order,
       observation_id
     FROM
-      `{DEID}.observation` 
+      `{DEID}.observation`
     JOIN
       `{COMBINED}._mapping_observation` as map
     USING
@@ -40,9 +39,9 @@
       AND value_source_concept_id IN (2000000008, 2000000005, 2000000004, 2000000002)
       AND map.src_hpo_id like "rdr"
     ) o
-  WHERE 
+  WHERE
     o.rank_order <> 1
-) unique_observation_ids 
+) unique_observation_ids
 ON o.observation_id = unique_observation_ids.observation_id
 """
 q = DUPLICATE_GEN_RACE_QUERY.format(DEID=DEID, COMBINED=COMBINED)
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/identify_required_labs.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/identify_required_labs.py
index c2bc3db3f9..b9b58a0c3f 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/identify_required_labs.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/identify_required_labs.py
@@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-import bq_utils
 import utils.bq
 from notebooks import render
 
@@ -27,13 +26,13 @@
   -- 36208195 Lab terms not yet categorized
   -- 36207527 Clinical terms not yet categorized
   -- 36210656 Survey terms not yet categorized
-  
-  -- Exclude the list of the "coarse" generalized concept ids 
+
+  -- Exclude the list of the "coarse" generalized concept ids
   -- 40772590: Cholesterol
   -- 40782521: Leukocytes
   -- 40779250: Protein in the grandparent lookup
-  SELECT 
-    excluded_ancestor_concept_id 
+  SELECT
+    excluded_ancestor_concept_id
   FROM UNNEST([36208978, 36206173, 36208195, 36207527, 36210656, 40782521, 40779250, 40772590]) AS excluded_ancestor_concept_id
 ),
 
@@ -51,22 +50,22 @@
     IF(ex.excluded_ancestor_concept_id IS NULL, COALESCE(ca.min_levels_of_separation, -1), -1) AS distance
   FROM
     `ehr_ops.measurement_concept_sets` AS m
-  JOIN 
+  JOIN
     `{VOCAB_DATASET_ID}.concept` AS c1
-  ON 
+  ON
     m.Measurement_OMOP_ID = c1.concept_id
   LEFT JOIN
     `{VOCAB_DATASET_ID}.concept_ancestor` AS ca
   ON
-    m.Measurement_OMOP_ID = ca.descendant_concept_id 
+    m.Measurement_OMOP_ID = ca.descendant_concept_id
     AND ca.min_levels_of_separation = 1
-  LEFT JOIN 
+  LEFT JOIN
     get_excluded_ancestor_ids AS ex
-  ON 
+  ON
     ca.ancestor_concept_id = ex.excluded_ancestor_concept_id
   LEFT JOIN
     `{VOCAB_DATASET_ID}.concept` AS c2
-  ON 
+  ON
     ca.ancestor_concept_id = c2.concept_id
   WHERE c2.concept_class_id IS NULL OR c2.concept_class_id = 'LOINC Group'
 ),
@@ -85,23 +84,23 @@
     IF(ex.excluded_ancestor_concept_id IS NULL, COALESCE(ca.min_levels_of_separation, -1), -1) AS distance
   FROM
     `ehr_ops.measurement_concept_sets` AS m
-  JOIN 
+  JOIN
     `{VOCAB_DATASET_ID}.concept` AS c1
-  ON 
+  ON
     m.Measurement_OMOP_ID = c1.concept_id
   LEFT JOIN
     `{VOCAB_DATASET_ID}.concept_ancestor` AS ca
   ON
-    m.Measurement_OMOP_ID = ca.descendant_concept_id 
+    m.Measurement_OMOP_ID = ca.descendant_concept_id
       AND ca.min_levels_of_separation IN (1, 2)
-  LEFT JOIN 
+  LEFT JOIN
     get_excluded_ancestor_ids AS ex
-  ON 
+  ON
     ca.ancestor_concept_id = ex.excluded_ancestor_concept_id
   LEFT JOIN
     `{VOCAB_DATASET_ID}.concept` AS c2
-  ON 
-    ca.ancestor_concept_id = c2.concept_id  
+  ON
+    ca.ancestor_concept_id = c2.concept_id
   WHERE
     -- if there is not ancestors for the measurement_concept_id
     (ca.descendant_concept_id IS NULL)
@@ -112,15 +111,15 @@
     -- if the level of seperation is 2, we keep them only when the concept_name subsumes the grandparent concept_name
     (c2.concept_class_id = 'LOINC Hierarchy' AND ca.min_levels_of_separation = 2 AND c1.concept_name LIKE CONCAT('%', c2.concept_name , '%'))
     OR
-    -- if the level of seperation is 2, the 6 concept names (such as MCH [Entitic mass], MCV [Entitic volume]) do not follow the previous rule, 
+    -- if the level of seperation is 2, the 6 concept names (such as MCH [Entitic mass], MCV [Entitic volume]) do not follow the previous rule,
     -- because the acronyms are used in the concept_name and full names are used in the grandparent concept_name
     (c2.concept_class_id = 'LOINC Hierarchy' AND ca.min_levels_of_separation = 2 AND c1.concept_id IN (3035941, 3024731, 3003338, 3012030, 3009744, 3023599))
 ),
 
-get_ancestors_loinc_hierarchy_distinct AS 
+get_ancestors_loinc_hierarchy_distinct AS
 (
-  # For some concepts in LONIC Hierarchy, we include both parent and grandparent concept_ids, 
-  # We want to remove the parent concept_id if the grandparent concept_id is present. 
+  # For some concepts in LONIC Hierarchy, we include both parent and grandparent concept_ids,
+  # We want to remove the parent concept_id if the grandparent concept_id is present.
   SELECT DISTINCT
       Panel_OMOP_ID,
       Panel_Name,
@@ -132,7 +131,7 @@
       distance
   FROM
   (
-    SELECT DISTINCT 
+    SELECT DISTINCT
       *,
       dense_rank() over(PARTITION BY measurement_concept_id ORDER BY distance DESC) AS rank_order
     FROM get_ancestors_loinc_hierarchy
@@ -142,9 +141,9 @@
 
 get_loinc_group_descendant_concept_ids AS
 (
-  # We use left join to concept_ancestor because not all the concepts have an ancestor, in which case 
+  # We use left join to concept_ancestor because not all the concepts have an ancestor, in which case
   # we make the measurement_concept_id its own ancestor
-  SELECT 
+  SELECT
     lg.Panel_OMOP_ID,
     lg.Panel_Name,
     lg.measurement_concept_id,
@@ -157,18 +156,18 @@
     COALESCE(c1.concept_class_id, lg.parent_concept_class_id) AS loinc_groupy_descendant_concept_class_id,
     COALESCE(ca1.min_levels_of_separation, -1) AS distance
   FROM get_direct_parents_loinc_group AS lg
-  LEFT JOIN 
+  LEFT JOIN
     {VOCAB_DATASET_ID}.concept_ancestor AS ca1
   ON
-    lg.parent_concept_id = ca1.ancestor_concept_id 
+    lg.parent_concept_id = ca1.ancestor_concept_id
       AND ca1.min_levels_of_separation <> 0
   LEFT JOIN {VOCAB_DATASET_ID}.concept AS c1
-    ON ca1.descendant_concept_id = c1.concept_id 
+    ON ca1.descendant_concept_id = c1.concept_id
 ),
 
 get_loinc_hierarchy_descendant_concept_ids AS
 (
-  # We use left join to concept_ancestor because not all the concepts have an ancestor, in which case 
+  # We use left join to concept_ancestor because not all the concepts have an ancestor, in which case
   # we make the measurement_concept_id its own ancestor
   SELECT
     lh.Panel_OMOP_ID,
@@ -183,19 +182,19 @@
     COALESCE(c1.concept_class_id, lh.ancestor_concept_class_id) AS loinc_hierarchy_descendant_concept_class_id,
     COALESCE(ca1.min_levels_of_separation, -1) AS distance
   FROM get_ancestors_loinc_hierarchy_distinct AS lh
-  LEFT JOIN 
+  LEFT JOIN
     {VOCAB_DATASET_ID}.concept_ancestor AS ca1
   ON
     lh.ancestor_concept_id = ca1.ancestor_concept_id
       AND ca1.min_levels_of_separation <> 0
   LEFT JOIN {VOCAB_DATASET_ID}.concept AS c1
-    ON ca1.descendant_concept_id = c1.concept_id  
+    ON ca1.descendant_concept_id = c1.concept_id
 ),
 
 get_measurement_concept_sets_descendants AS
 (
-  # We use a full outer join between the loinc_hierarchy descendants and loinc_group descendants 
-  # in order to maximize the number of descendants retrieved by both classficiation systems. 
+  # We use a full outer join between the loinc_hierarchy descendants and loinc_group descendants
+  # in order to maximize the number of descendants retrieved by both classficiation systems.
   SELECT DISTINCT
     COALESCE(lh.Panel_OMOP_ID, lg.Panel_OMOP_ID) AS panel_omop_id,
     COALESCE(lh.Panel_Name, lg.Panel_Name) AS panel_name,
@@ -213,7 +212,7 @@
     COALESCE(lh.loinc_hierarchy_descendant_concept_name, lg.loinc_groupy_descendant_concept_name) AS descendant_concept_name,
     COALESCE(lh.loinc_hierarchy_descendant_concept_class_id, lg.loinc_groupy_descendant_concept_class_id) AS descendant_concept_class_id
   FROM get_loinc_hierarchy_descendant_concept_ids AS lh
-  FULL OUTER JOIN 
+  FULL OUTER JOIN
     get_loinc_group_descendant_concept_ids AS lg
   ON
     lh.loinc_hierarchy_descendant_concept_id = lg.loinc_groupy_descendant_concept_id
@@ -228,20 +227,20 @@
   COUNT(DISTINCT person_id) AS n_person,
   COUNT(DISTINCT measurement_id) AS n_meas,
   COUNT(DISTINCT descendant_concept_id) AS n_descendant
-FROM 
+FROM
 (
   SELECT
     measurement_id,
     person_id,
     IF(measurement_concept_id IS NULL OR measurement_concept_id=0, measurement_source_concept_id, measurement_concept_id) AS measurement_concept_id
   FROM
-    `{DATASET_ID}.measurement` 
+    `{DATASET_ID}.measurement`
 ) meas
 JOIN
   `{DATASET_ID}._mapping_measurement`
 USING
   (measurement_id)
-JOIN 
+JOIN
   get_measurement_concept_sets_descendants AS valid_lab
 ON
   meas.measurement_concept_id = valid_lab.descendant_concept_id
@@ -251,7 +250,7 @@
   3,
   4,
   5
-ORDER BY 
+ORDER BY
   1,2
 """
 
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/list_tables_with_duplicate_domain_ids.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/list_tables_with_duplicate_domain_ids.py
index 4baab0bc7b..ca0370ff17 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/list_tables_with_duplicate_domain_ids.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/list_tables_with_duplicate_domain_ids.py
@@ -12,7 +12,6 @@
 #     name: python3
 # ---
 
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -26,7 +25,7 @@
 query = """
 SELECT REPLACE(table_id, '_person', '') AS hpo_id
 FROM `{bq_dataset_id}.__TABLES__`
-WHERE table_id LIKE '%person' 
+WHERE table_id LIKE '%person'
 AND table_id NOT LIKE '%unioned_ehr_%' AND table_id NOT LIKE '\\\_%'
 """.format(bq_dataset_id=bigquery_dataset_id)
 hpo_ids = utils.bq.query(query).tolist()
@@ -48,7 +47,7 @@
 FROM prod_drc_dataset.__TABLES__ T
 LEFT JOIN
 (select distinct '{h}_{d}' as table_name, count(*) as num_dups
-from `{bq_dataset_id}.{h}_{d}` 
+from `{bq_dataset_id}.{h}_{d}`
 group by {d}_id
 having count(*) > 1
 order by num_dups desc
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/person_gender_sex.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/person_gender_sex.py
index 9311b51082..616d11a5c6 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/person_gender_sex.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/person_gender_sex.py
@@ -7,7 +7,6 @@
 #   * `sex_at_birth_concept_id` contains the associated `value_as_concept_id`
 #   * `sex_at_birth_source_concept_id` contains the associated `value_source_concept_id`
 #   * `sex_at_birth_source_value` contains the `concept_code` associated with `sex_at_birth_source_concept_id`
-import bq_utils
 import utils.bq
 from notebooks import render
 from notebooks.parameters import SANDBOX, DEID_DATASET_ID
@@ -88,7 +87,7 @@ def df_to_gbq(df, destination_table, table_schema=None):
 # -
 
 UPDATED_PERSON_QUERY = """
-SELECT 
+SELECT
   p.person_id,
   g.gender_concept_id,
   p.year_of_birth,
@@ -127,7 +126,7 @@ def df_to_gbq(df, destination_table, table_schema=None):
           table_schema=person_schema)
 
 PERSON_HIST_QUERY = """
-SELECT 
+SELECT
  p.gender_concept_id,
  p.gender_source_value,
  p.gender_source_concept_id,
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/pop_retract.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/pop_retract.py
index 654cd90dd0..1fdf9b2241 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/pop_retract.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/pop_retract.py
@@ -1,7 +1,6 @@
 # +
 from jinja2 import Template
 
-import bq_utils
 import utils.bq
 from notebooks import render
 from notebooks.defaults import is_deid_dataset
@@ -36,14 +35,14 @@
 
 # Determine associated research IDs for RDR participants whose data must be retracted
 AIAN_PID_QUERY = """
-SELECT DISTINCT 
+SELECT DISTINCT
        rdr.person_id    AS person_id,
        deid.research_id AS research_id
 FROM `{RDR}.observation` rdr
  JOIN `{COMBINED}.deid_map` deid
   ON rdr.person_id = deid.person_id
-WHERE 
-    rdr.observation_source_concept_id = 1586140 
+WHERE
+    rdr.observation_source_concept_id = 1586140
 AND rdr.value_source_concept_id       = 1586141
 """
 q = AIAN_PID_QUERY.format(RDR=RDR, COMBINED=COMBINED)
@@ -80,24 +79,24 @@ def get_tables_with_person_id(input_dataset):
 WITH delete_row_counts AS (
  {% for table in TABLES %}
    (
-     SELECT '{{ table }}' AS table_name, 
+     SELECT '{{ table }}' AS table_name,
      COUNT(1) AS rows_to_delete,
      (SELECT row_count FROM {{ INPUT_DATASET }}.__TABLES__ WHERE table_id = '{{ table }}') AS total_rows
      FROM `{{ INPUT_DATASET }}.{{ table }}` t
      WHERE EXISTS (
-      SELECT 1 FROM `{{ ID_TABLE }}` 
+      SELECT 1 FROM `{{ ID_TABLE }}`
       WHERE {{ 'research_id' if IS_INPUT_DATASET_DEID else 'person_id' }} = t.person_id)
-   ) 
+   )
    {% if not loop.last %}
    UNION ALL
-   {% endif %}  
+   {% endif %}
  {% endfor %}
 )
-SELECT 
- d.table_name, 
+SELECT
+ d.table_name,
  d.total_rows                                    AS input_row_count,
  d.rows_to_delete                                AS rows_to_delete,
- d.total_rows - d.rows_to_delete                 AS expected_output_row_count, 
+ d.total_rows - d.rows_to_delete                 AS expected_output_row_count,
  t.row_count                                     AS actual_output_row_count,
  t.row_count = (d.total_rows - d.rows_to_delete) AS pass
 FROM delete_row_counts d
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/sex_gender_540.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/sex_gender_540.py
index a12fb65d5e..aaa610554a 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/sex_gender_540.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/sex_gender_540.py
@@ -28,7 +28,6 @@
 #     - gender_concept_id = value_as_concept_id
 #     - gender_source_value = concept_code associated with value_source_concept_id
 #     - gender_source_concept_id = value_source_concept_id
-import bq_utils
 import utils.bq
 from notebooks import render, parameters
 import pandas as pd
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/site_mapping.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/site_mapping.py
index 57b6730374..71cfbf19cd 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/site_mapping.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/site_mapping.py
@@ -1,5 +1,4 @@
 # +
-import bq_utils
 import utils.bq
 from notebooks import render, parameters
 
@@ -12,10 +11,10 @@
 # ## Row counts in combined `_mapping*` and deid `*_ext` tables
 
 ROW_COUNTS_QUERY = """
-SELECT dataset_id, 
-  REPLACE(REPLACE(table_id, '_mapping_', ''), '_ext', '') mapped_table, 
-  table_id, 
-  creation_time, 
+SELECT dataset_id,
+  REPLACE(REPLACE(table_id, '_mapping_', ''), '_ext', '') mapped_table,
+  table_id,
+  creation_time,
   last_modified_time,
   row_count
 FROM
@@ -25,7 +24,7 @@
 
  UNION ALL
 
- SELECT * 
+ SELECT *
  FROM {COMBINED}.__TABLES__ d1
  WHERE table_id LIKE '\\\_mapping\\\_%')
 
diff --git a/data_steward/analytics/cdr_ops/ad_hoc_analyses/standard_concepts_cdr_389.py b/data_steward/analytics/cdr_ops/ad_hoc_analyses/standard_concepts_cdr_389.py
index bac69c94ec..47af6fe87d 100644
--- a/data_steward/analytics/cdr_ops/ad_hoc_analyses/standard_concepts_cdr_389.py
+++ b/data_steward/analytics/cdr_ops/ad_hoc_analyses/standard_concepts_cdr_389.py
@@ -33,7 +33,6 @@
 #
 #
 # #### This notebook also does not exclude instances where the concept_id = 0.
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -53,7 +52,7 @@
 co_query = """
 SELECT
 DISTINCT
-co.condition_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn, 
+co.condition_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn,
 co_combined.condition_concept_id as post_cr_concept_id, c2.standard_concept as post_cr_standard_concept, c2.concept_name as post_cr_cn,
 (LOWER(c2.domain_id) LIKE '%condition%') as post_cr_domain_correct,
 COUNT(*) as count, COUNT(DISTINCT mco.src_hpo_id) as num_sites_w_change
@@ -127,7 +126,7 @@
 de_query = """
 SELECT
 DISTINCT
-de.drug_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn, 
+de.drug_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn,
 de_combined.drug_concept_id as post_cr_concept_id, c2.standard_concept as post_cr_standard_concept, c2.concept_name as post_cr_cn,
 (LOWER(c2.domain_id) LIKE '%drug%') as post_cr_domain_correct,
 COUNT(*) as count, COUNT(DISTINCT mde.src_hpo_id) as num_sites_w_change
@@ -202,7 +201,7 @@
 m_query = """
 SELECT
 DISTINCT
-m.measurement_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn, 
+m.measurement_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn,
 m_combined.measurement_concept_id as post_cr_concept_id, c2.standard_concept as post_cr_standard_concept, c2.concept_name as post_cr_cn,
 (LOWER(c2.domain_id) LIKE '%measurement%') as post_cr_domain_correct,
 COUNT(*) as count, COUNT(DISTINCT mm.src_hpo_id) as num_sites_w_change
@@ -272,7 +271,7 @@
 v_query = """
 SELECT
 DISTINCT
-v.visit_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn, 
+v.visit_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn,
 v_combined.visit_concept_id as post_cr_concept_id, c2.standard_concept as post_cr_standard_concept, c2.concept_name as post_cr_cn,
 (LOWER(c2.domain_id) LIKE '%visit%') as post_cr_domain_correct,
 COUNT(*) as count, COUNT(DISTINCT mv.src_hpo_id) as num_sites_w_change
@@ -342,7 +341,7 @@
 p_query = """
 SELECT
 DISTINCT
-p.procedure_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn, 
+p.procedure_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn,
 p_combined.procedure_concept_id as post_cr_concept_id, c2.standard_concept as post_cr_standard_concept, c2.concept_name as post_cr_cn,
 (LOWER(c2.domain_id) LIKE '%procedure%') as post_cr_domain_correct,
 COUNT(*) as count, COUNT(DISTINCT mp.src_hpo_id) as num_sites_w_change
@@ -411,7 +410,7 @@
 o_query = """
 SELECT
 DISTINCT
-o.observation_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn, 
+o.observation_concept_id as pre_cr_concept_id, c1.standard_concept as pre_cr_standard_concept, c1.concept_name as pre_cr_cn,
 o_combined.observation_concept_id as post_cr_concept_id, c2.standard_concept as post_cr_standard_concept, c2.concept_name as post_cr_cn,
 (LOWER(c2.domain_id) LIKE '%observation%') as post_cr_domain_correct,
 COUNT(*) as count, COUNT(DISTINCT mo.src_hpo_id) as num_sites_w_change
diff --git a/data_steward/analytics/cdr_ops/systematic_scripts/data_loss_through_pipeline.py b/data_steward/analytics/cdr_ops/systematic_scripts/data_loss_through_pipeline.py
index f18712905c..984cf7e996 100644
--- a/data_steward/analytics/cdr_ops/systematic_scripts/data_loss_through_pipeline.py
+++ b/data_steward/analytics/cdr_ops/systematic_scripts/data_loss_through_pipeline.py
@@ -29,7 +29,6 @@
 # %load_ext google.cloud.bigquery
 
 # %matplotlib inline
-import bq_utils
 import utils.bq
 from notebooks import parameters
 import pandas as pd
@@ -58,25 +57,25 @@ def create_dicts_w_info(df, x_label, column_label):
     """
     This function is used to create a dictionary that can be easily converted to a
     graphical representation based on the values for a particular dataframe
-    
+
     Parameters
     ----------
     df (dataframe): dataframe that contains the information to be converted
-    
+
     x_label (string): the column of the dataframe whose rows will then be converted
         to they keys of a dictionary
-    
+
     column_label (string): the column that contains the data quality metric being
         investigated
-    
+
     Returns
     -------
     data_qual_info (dictionary): has the following structure
-        
+
         keys: the column for a particular dataframe that represents the elements that
             whose data quality is being compared (e.g. HPOs, different measurement/unit
             combinations)
-        
+
         values: the data quality metric being compared
     """
     rows = df[x_label].unique().tolist()
@@ -98,28 +97,28 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, color,
     """
     Function is used to create a bar graph for a particular dictionary with information about
     data quality
-    
+
     Parameters
     ----------
     info_dict (dictionary): contains information about data quality. The keys for the dictionary
         will serve as the x-axis labels whereas the values should serve as the 'y-value' for the
         particular bar
-        
+
     xlabel (str): label to display across the x-axis
-    
+
     ylabel (str): label to display across the y-axis
-    
+
     title (str): title for the graph
-    
+
     img_name (str): image used to save the image to the local repository
-    
+
     color (str): character used to specify the colours of the bars
-    
+
     total_diff_color (bool): indicates whether or not the last bar should be coloured red (
         as opposed to the rest of the bars on the graph). This is typically used when the ultimate
         value of the dictionary is of particular important (e.g. representing an 'aggregate' metric
         across all of the sites)
-        
+
     turnoff_x (bool): used to disable the x-axis labels (for each of the bars). This is typically used
         when there are so many x-axis labels that they overlap and obscure legibility
     """
@@ -191,23 +190,23 @@ def create_pie_chart(dataframe, title, img_name):
     """
     Function is used to create a pie chart that can show how much each site contributes
     to the overall 'drop' between the unioned and combined datasets
-    
+
     Function also saves the outputted pie chart to the current directory
-    
+
     Parameters
     ----------
-    dataframe (df): dataframe for a particular table. shows the following for 
+    dataframe (df): dataframe for a particular table. shows the following for
                     HPOs that uploaded data:
-                    
+
         a. the number of rows in the unioned dataset
         b. the number of rows in the combined dataset
         c. the total 'drop' of rows across unioned to combined, expressed as a percentage
         d. the relative 'contribution' of each site to the overall drop from unioned to
            combined
-           
-    
+
+
     title (str): title of the graph
-    
+
     img_name (str): title of the image to be saved
     """
     hpo_list = dataframe['source_hpo'].tolist()[1:]  # do not take 'total'
@@ -246,24 +245,24 @@ def generate_query(dataset, person_var, record_var, table_name, field_name):
         a. generate a string that can be fed into BigQuery
         b. create a dataframe that contains information about the number of people and
            records for a particular dataset
-           
+
     Parameters
     ----------
     dataset (string): name of the dataset that will be queried (originally from the
                       parameters file)
-    
+
     person_var (string): variable that dictates how the 'number of people' will be
                          displayed in the resultant dataframe
-                         
+
     record_var (string): variable that dictates how the 'number of records' will be
                          displayed in the resultant dataframe
-                         
+
     table_name (string): represents the table that is being queried
-                         
+
     field_name (string): represents the field that should count the number of records
                          for a particular dataset/table combination. this is usually
                          'table name'_id
-    
+
 
     Returns
     -------
@@ -295,11 +294,11 @@ def generate_query(dataset, person_var, record_var, table_name, field_name):
 def extract_first_int_from_series(series):
     """
     Function is used to extract the first integer from a Pandas series object.
-    
+
     Parameters
     ----------
     series (series): Pandas series object
-    
+
     Returns
     -------
     integer (int): the first integer from a Pandas series object
@@ -319,38 +318,38 @@ def create_aggregate_table_df(unioned, combined, deid, unioned_persons_string,
                               record_string):
     """
     Function is used to create a dataframe that can display the 'drop off' of records across multiple
-    stages of the pipeline. 
-    
-    
+    stages of the pipeline.
+
+
     Parameters:
     -----------
-    
+
     unioned (dataframe): contains information regarding the number of persons and record in the unioned
         dataset
-    
+
     combined (dataframe): contains information regarding the number of persons and record in the combined
         dataset
-    
+
     deid (dataframe): contains information regarding the number of persons and record in the deid
         dataset
-    
+
     unioned_person_string (str): column name to determine the number of persons in the unioned dataset
-    
+
     combined_person_string (str): column name to determine the number of persons in the combined dataset
-    
+
     deid_person_string (str): column name to determine the number of persons in the deid dataset
-    
+
     unioned_records_string (str): column name to determine the number of records in the unioned dataset
-    
+
     combined_records_string (str): column name to determine the number of records in the combined dataset
 
     deid_records_string (str): column name to determine the number of records in the deid dataset
-    
+
     person_string (str): row title to indicate the person drop for each stage of the pipeline
-    
+
     record_string (str): row title to indicate the record drop for each stage of the pipeline
-    
-    
+
+
     Returns:
     --------
     df (dataframe): contains information about the record and person count drop across each stage of
@@ -761,19 +760,19 @@ def generate_site_level_query(id_name, unioned, table_name, combined):
         b. the number of rows for the HPO for a particular table in the unioned dataset
         c. the number of rows for the HPO for a particular table in the combined dataset
         d. the total 'drop' of rows across unioned to combined, expressed as a percentage
-    
+
     Parameters
     ----------
-    id_name (string): represents the 'primary key' of the table (the unique identifier 
+    id_name (string): represents the 'primary key' of the table (the unique identifier
                       for each row)
-                      
+
     unioned (string): the name of the unioned dataset to be queried
-    
+
     table_name (string): name of the table that is being investigated
-    
+
     combined (string): the name of the combined dataset to be queried
-    
-    
+
+
     Returns
     -------
     dataframe (df): contains all of the information outlined in the top of the docstring
@@ -827,17 +826,17 @@ def add_total_drop_row(dataframe):
     """
     Function is used to add a 'total' row at the bottom of a dataframe that shows the
     relative 'drop' across the pipeline (unioned to combined) for the different sites.
-    
+
     This row will show:
         a. the number of rows in the unioned dataset
         b. the number of rows in the combined dataset
         c. the total 'drop' of rows across unioned to combined, expressed as a percentage
-    
+
     Parameters:
     ----------
     dataframe (df): dataframe for a particular table. shows a-c (above) for each of the
         HPOs that uploaded data
-        
+
     Returns:
     --------
     dataframe (df): the inputted dataframe with an additional 'total' row at the end
@@ -869,16 +868,16 @@ def add_percent_of_drop_column(dataframe):
     Function is used to add a 'percent_of_drop' column that shows how much
     each site's 'drop' contributed to the 'overall' drop from the unioned
     to the combined steps of the pipeline.
-    
+
     Parameters
     ----------
-    dataframe (df): dataframe for a particular table. shows the following for 
+    dataframe (df): dataframe for a particular table. shows the following for
                     HPOs that uploaded data:
-                    
+
         a. the number of rows in the unioned dataset
         b. the number of rows in the combined dataset
         c. the total 'drop' of rows across unioned to combined, expressed as a percentage
-        
+
     Returns
     -------
     dataframe (df): the above dataframe with a new column that shows each site's
diff --git a/data_steward/analytics/cdr_ops/systematic_scripts/date_disparity_with_respect_to_visit.py b/data_steward/analytics/cdr_ops/systematic_scripts/date_disparity_with_respect_to_visit.py
index 532ca1febf..b75d78c410 100644
--- a/data_steward/analytics/cdr_ops/systematic_scripts/date_disparity_with_respect_to_visit.py
+++ b/data_steward/analytics/cdr_ops/systematic_scripts/date_disparity_with_respect_to_visit.py
@@ -38,7 +38,6 @@
 # %load_ext google.cloud.bigquery
 
 # +
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -72,11 +71,11 @@
 p_v_query = """
 SELECT
 DISTINCT
-a.*, 
+a.*,
 (a.procedure_vis_start_diff + a.procedure_vis_end_diff + a.procedure_vis_start_dt_diff + a.procedure_vis_end_dt_diff + a.procedure_dt_vis_start_dt_diff + a.procedure_dt_vis_end_dt_diff) as total_diff
-FROM 
+FROM
 ( SELECT
-  mpo.src_hpo_id, COUNT(mpo.src_hpo_id) as num_bad_records, 
+  mpo.src_hpo_id, COUNT(mpo.src_hpo_id) as num_bad_records,
   IFNULL(ABS(DATE_DIFF(po.procedure_date, vo.visit_start_date, DAY)), 0) as procedure_vis_start_diff,
   IFNULL(ABS(DATE_DIFF(po.procedure_date, vo.visit_end_date, DAY)), 0) as procedure_vis_end_diff,
   IFNULL(ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), po.procedure_date, DAY)), 0) as procedure_vis_start_dt_diff,
@@ -85,19 +84,19 @@
   IFNULL(ABS(DATE_DIFF(CAST(po.procedure_datetime AS DATE), CAST(vo.visit_end_datetime AS DATE), DAY)), 0) as procedure_dt_vis_end_dt_diff,
 
   (
-  ABS(DATE_DIFF(po.procedure_date, vo.visit_start_date, DAY)) = 
-  ABS(DATE_DIFF(po.procedure_date, vo.visit_end_date, DAY)) 
+  ABS(DATE_DIFF(po.procedure_date, vo.visit_start_date, DAY)) =
+  ABS(DATE_DIFF(po.procedure_date, vo.visit_end_date, DAY))
   AND
   ABS(DATE_DIFF(po.procedure_date, vo.visit_end_date, DAY)) =
-  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), po.procedure_date, DAY)) 
+  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), po.procedure_date, DAY))
   AND
   ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), po.procedure_date, DAY)) =
   ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), po.procedure_date, DAY))
   AND
-  ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), po.procedure_date, DAY)) = 
-  ABS(DATE_DIFF(CAST(po.procedure_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) 
+  ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), po.procedure_date, DAY)) =
+  ABS(DATE_DIFF(CAST(po.procedure_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY))
   AND
-  ABS(DATE_DIFF(CAST(po.procedure_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) = 
+  ABS(DATE_DIFF(CAST(po.procedure_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) =
   ABS(DATE_DIFF(CAST(po.procedure_datetime AS DATE), CAST(vo.visit_end_datetime AS DATE), DAY))
   ) as all_discrepancies_equal
 
@@ -131,7 +130,7 @@
     OR
     po.procedure_date > vo.visit_end_date)
 
-    OR 
+    OR
     -- problem with datetime
     (po.procedure_datetime < vo.visit_start_datetime
     OR
@@ -142,9 +141,9 @@
     (po.procedure_date < CAST(vo.visit_start_datetime AS DATE)
     OR
     po.procedure_date > CAST(vo.visit_end_datetime AS DATE))
-    
+
     OR
-    
+
     --problem with the datetime
     (CAST(po.procedure_datetime AS DATE) < CAST(vo.visit_start_datetime AS DATE)
     OR
@@ -188,28 +187,28 @@ def create_dicts_w_info(df,
     """
     This function is used to create a dictionary that can be easily converted to a
     graphical representation based on the values for a particular dataframe
-    
+
     Parameters
     ----------
     df (dataframe): dataframe that contains the information to be converted
-    
+
     table_visit_diff_string (string): the column that is used to calculate the 'average'
         difference between a date of interest and the visit start date. for instance,
         this would allow someone to specify the difference between the observation
         date and the visit start date.
-        
+
     bad_records_string (string): the column of the dataframe whose rows will be summed
         and then converted to the keys of a dictionary. for instance 'num_bad_records'
         is often used to show the total number of 'bad' (discrepant) records
         for a particular site
-    
+
     Returns
     -------
     num_bad_records (dictionary): has the following structure
         keys: the HPOs
         values: the total number of 'bad' (discrepant) records for the particular
             column of interest
-    
+
     table_visit_diff_dict (dictionary): has the following structure
         keys: the HPOs
         values: the 'average' difference between the two types of dates as specified
@@ -262,27 +261,27 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     """
     Function is used to create a bar graph for a particular dictionary with information about
     data quality
-    
+
     Parameters
     ----------
     info_dict (dictionary): contains information about data quality. The keys for the dictionary
         will serve as the x-axis labels whereas the values should serve as the 'y-value' for the
         particular bar
-        
+
     xlabel (str): label to display across the x-axis
-    
+
     ylabel (str): label to display across the y-axis
-    
+
     title (str): title for the graph
-    
+
     img_name (str): image used to save the image to the local repository
-    
+
     colour (str): character used to specify the colours of the bars
-    
+
     total_diff_colour (bool): indicates whether or not the last bar should be coloured red (
         as opposed to the rest of the bars on the graph). This is typically used when the ultimate
         value of the dictionary is of particular important (e.g. representing an 'aggregate' metric
-        across all of the sites)    
+        across all of the sites)
     """
     bar_list = plt.bar(range(len(info_dict)),
                        list(info_dict.values()),
@@ -339,11 +338,11 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
 observation_visit_query = """
 SELECT
 DISTINCT
-a.*, 
+a.*,
 (a.observation_vis_start_diff + a.observation_vis_end_diff + a.observation_vis_start_dt_diff + a.observation_vis_end_dt_diff + a.observation_dt_vis_start_dt_diff + a.observation_dt_vis_end_dt_diff) as total_diff
-FROM 
+FROM
 ( SELECT
-  mo.src_hpo_id, COUNT(mo.src_hpo_id) as num_bad_records, 
+  mo.src_hpo_id, COUNT(mo.src_hpo_id) as num_bad_records,
   IFNULL(ABS(DATE_DIFF(o.observation_date, vo.visit_start_date, DAY)), 0) as observation_vis_start_diff,
   IFNULL(ABS(DATE_DIFF(o.observation_date, vo.visit_end_date, DAY)), 0) as observation_vis_end_diff,
   IFNULL(ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), o.observation_date, DAY)), 0) as observation_vis_start_dt_diff,
@@ -352,19 +351,19 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
   IFNULL(ABS(DATE_DIFF(CAST(o.observation_datetime AS DATE), CAST(vo.visit_end_datetime AS DATE), DAY)), 0) as observation_dt_vis_end_dt_diff,
 
   (
-  ABS(DATE_DIFF(o.observation_date, vo.visit_start_date, DAY)) = 
-  ABS(DATE_DIFF(o.observation_date, vo.visit_end_date, DAY)) 
+  ABS(DATE_DIFF(o.observation_date, vo.visit_start_date, DAY)) =
+  ABS(DATE_DIFF(o.observation_date, vo.visit_end_date, DAY))
   AND
   ABS(DATE_DIFF(o.observation_date, vo.visit_end_date, DAY)) =
-  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), o.observation_date, DAY)) 
+  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), o.observation_date, DAY))
   AND
   ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), o.observation_date, DAY)) =
   ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), o.observation_date, DAY))
   AND
-  ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), o.observation_date, DAY)) = 
-  ABS(DATE_DIFF(CAST(o.observation_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) 
+  ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), o.observation_date, DAY)) =
+  ABS(DATE_DIFF(CAST(o.observation_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY))
   AND
-  ABS(DATE_DIFF(CAST(o.observation_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) = 
+  ABS(DATE_DIFF(CAST(o.observation_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) =
   ABS(DATE_DIFF(CAST(o.observation_datetime AS DATE), CAST(vo.visit_end_datetime AS DATE), DAY))
   ) as all_discrepancies_equal
 
@@ -398,7 +397,7 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     OR
     o.observation_date > vo.visit_end_date)
 
-    OR 
+    OR
     -- problem with datetime
     (o.observation_datetime < vo.visit_start_datetime
     OR
@@ -409,9 +408,9 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     (o.observation_date < CAST(vo.visit_start_datetime AS DATE)
     OR
     o.observation_date > CAST(vo.visit_end_datetime AS DATE))
-    
+
     OR
-    
+
     --problem with the datetime
     (CAST(o.observation_datetime AS DATE) < CAST(vo.visit_start_datetime AS DATE)
     OR
@@ -487,11 +486,11 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
 measurement_visit_query = """
 SELECT
 DISTINCT
-a.*, 
+a.*,
 (a.measurement_vis_start_diff + a.measurement_vis_end_diff + a.measurement_vis_start_dt_diff + a.measurement_vis_end_dt_diff + a.measurement_dt_vis_start_dt_diff + a.measurement_dt_vis_end_dt_diff) as total_diff
-FROM 
+FROM
 ( SELECT
-  mm.src_hpo_id, COUNT(mm.src_hpo_id) as num_bad_records, 
+  mm.src_hpo_id, COUNT(mm.src_hpo_id) as num_bad_records,
   IFNULL(ABS(DATE_DIFF(m.measurement_date, vo.visit_start_date, DAY)), 0) as measurement_vis_start_diff,
   IFNULL(ABS(DATE_DIFF(m.measurement_date, vo.visit_end_date, DAY)), 0) as measurement_vis_end_diff,
   IFNULL(ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), m.measurement_date, DAY)), 0) as measurement_vis_start_dt_diff,
@@ -500,19 +499,19 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
   IFNULL(ABS(DATE_DIFF(CAST(m.measurement_datetime AS DATE), CAST(vo.visit_end_datetime AS DATE), DAY)), 0) as measurement_dt_vis_end_dt_diff,
 
   (
-  ABS(DATE_DIFF(m.measurement_date, vo.visit_start_date, DAY)) = 
-  ABS(DATE_DIFF(m.measurement_date, vo.visit_end_date, DAY)) 
+  ABS(DATE_DIFF(m.measurement_date, vo.visit_start_date, DAY)) =
+  ABS(DATE_DIFF(m.measurement_date, vo.visit_end_date, DAY))
   AND
   ABS(DATE_DIFF(m.measurement_date, vo.visit_end_date, DAY)) =
-  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), m.measurement_date, DAY)) 
+  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), m.measurement_date, DAY))
   AND
   ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), m.measurement_date, DAY)) =
   ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), m.measurement_date, DAY))
   AND
-  ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), m.measurement_date, DAY)) = 
-  ABS(DATE_DIFF(CAST(m.measurement_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) 
+  ABS(DATE_DIFF(CAST(vo.visit_end_datetime AS DATE), m.measurement_date, DAY)) =
+  ABS(DATE_DIFF(CAST(m.measurement_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY))
   AND
-  ABS(DATE_DIFF(CAST(m.measurement_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) = 
+  ABS(DATE_DIFF(CAST(m.measurement_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) =
   ABS(DATE_DIFF(CAST(m.measurement_datetime AS DATE), CAST(vo.visit_end_datetime AS DATE), DAY))
   ) as all_discrepancies_equal
 
@@ -546,7 +545,7 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     OR
     m.measurement_date > vo.visit_end_date)
 
-    OR 
+    OR
     -- problem with datetime
     (m.measurement_datetime < vo.visit_start_datetime
     OR
@@ -557,9 +556,9 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     (m.measurement_date < CAST(vo.visit_start_datetime AS DATE)
     OR
     m.measurement_date > CAST(vo.visit_end_datetime AS DATE))
-    
+
     OR
-    
+
     --problem with the datetime
     (CAST(m.measurement_datetime AS DATE) < CAST(vo.visit_start_datetime AS DATE)
     OR
@@ -634,21 +633,21 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
 condition_visit_query = """
 SELECT
 DISTINCT
-a.*, 
+a.*,
 (a.condition_vis_start_diff + a.condition_vis_start_dt_diff + a.condition_dt_vis_start_dt_diff) as total_diff
-FROM 
+FROM
 ( SELECT
-  mco.src_hpo_id, COUNT(mco.src_hpo_id) as num_bad_records, 
+  mco.src_hpo_id, COUNT(mco.src_hpo_id) as num_bad_records,
   IFNULL(ABS(DATE_DIFF(co.condition_start_date, vo.visit_start_date, DAY)), 0) as condition_vis_start_diff,
   IFNULL(ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), co.condition_start_date, DAY)), 0) as condition_vis_start_dt_diff,
   IFNULL(ABS(DATE_DIFF(CAST(co.condition_start_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)), 0) as condition_dt_vis_start_dt_diff,
-  
+
   (
-  ABS(DATE_DIFF(co.condition_start_date, vo.visit_start_date, DAY)) = 
-  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), co.condition_start_date, DAY)) 
+  ABS(DATE_DIFF(co.condition_start_date, vo.visit_start_date, DAY)) =
+  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), co.condition_start_date, DAY))
   AND
   ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), co.condition_start_date, DAY)) =
-  ABS(DATE_DIFF(CAST(co.condition_start_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) 
+  ABS(DATE_DIFF(CAST(co.condition_start_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY))
   ) as all_discrepancies_equal
 
   FROM
@@ -679,16 +678,16 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     -- problem with procedure date
     (co.condition_start_date < vo.visit_start_date)
 
-    OR 
+    OR
     -- problem with datetime
     (co.condition_start_datetime < vo.visit_start_datetime)
 
     OR
     -- problem with the datetime (extracting date for comparison)
     (co.condition_start_date < CAST(vo.visit_start_datetime AS DATE))
-    
+
     OR
-    
+
     --problem with the datetime
     (CAST(co.condition_start_datetime AS DATE) < CAST(vo.visit_start_datetime AS DATE))
     )
@@ -752,21 +751,21 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
 drug_visit_query = """
 SELECT
 DISTINCT
-a.*, 
+a.*,
 (a.drug_vis_start_diff + a.drug_vis_start_dt_diff + a.drug_dt_vis_start_dt_diff) as total_diff
-FROM 
+FROM
 ( SELECT
-  mde.src_hpo_id, COUNT(mde.src_hpo_id) as num_bad_records, 
+  mde.src_hpo_id, COUNT(mde.src_hpo_id) as num_bad_records,
   IFNULL(ABS(DATE_DIFF(de.drug_exposure_start_date, vo.visit_start_date, DAY)), 0) as drug_vis_start_diff,
   IFNULL(ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), de.drug_exposure_start_date, DAY)), 0) as drug_vis_start_dt_diff,
   IFNULL(ABS(DATE_DIFF(CAST(de.drug_exposure_start_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)), 0) as drug_dt_vis_start_dt_diff,
-  
+
   (
-  ABS(DATE_DIFF(de.drug_exposure_start_date, vo.visit_start_date, DAY)) = 
-  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), de.drug_exposure_start_date, DAY)) 
+  ABS(DATE_DIFF(de.drug_exposure_start_date, vo.visit_start_date, DAY)) =
+  ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), de.drug_exposure_start_date, DAY))
   AND
   ABS(DATE_DIFF(CAST(vo.visit_start_datetime AS DATE), de.drug_exposure_start_date, DAY)) =
-  ABS(DATE_DIFF(CAST(de.drug_exposure_start_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY)) 
+  ABS(DATE_DIFF(CAST(de.drug_exposure_start_datetime AS DATE), CAST(vo.visit_start_datetime AS DATE), DAY))
   ) as all_discrepancies_equal
 
   FROM
@@ -797,16 +796,16 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     -- problem with procedure date
     (de.drug_exposure_start_date < vo.visit_start_date)
 
-    OR 
+    OR
     -- problem with datetime
     (de.drug_exposure_start_datetime < vo.visit_start_datetime)
 
     OR
     -- problem with the datetime (extracting date for comparison)
     (de.drug_exposure_start_date < CAST(vo.visit_start_datetime AS DATE))
-    
+
     OR
-    
+
     --problem with the datetime
     (CAST(de.drug_exposure_start_datetime AS DATE) < CAST(vo.visit_start_datetime AS DATE))
     )
diff --git a/data_steward/analytics/cdr_ops/systematic_scripts/ehr_data_quality_dashboard_testing.py b/data_steward/analytics/cdr_ops/systematic_scripts/ehr_data_quality_dashboard_testing.py
index fb691788c8..469889ef28 100644
--- a/data_steward/analytics/cdr_ops/systematic_scripts/ehr_data_quality_dashboard_testing.py
+++ b/data_steward/analytics/cdr_ops/systematic_scripts/ehr_data_quality_dashboard_testing.py
@@ -21,7 +21,6 @@
 # %load_ext google.cloud.bigquery
 
 # +
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -66,7 +65,7 @@
 JOIN
 `{}._mapping_measurement` mm
 ON
-mm.measurement_id = m.measurement_id 
+mm.measurement_id = m.measurement_id
 JOIN
 `{}.concept` c
 ON
@@ -108,7 +107,7 @@
 JOIN
 `{}._mapping_measurement` mm
 ON
-mm.measurement_id = m.measurement_id 
+mm.measurement_id = m.measurement_id
 JOIN
 `{}.concept` c
 ON
@@ -196,7 +195,7 @@
     JOIN
     `{}._mapping_measurement` mm
     ON
-    mm.measurement_id = m.measurement_id 
+    mm.measurement_id = m.measurement_id
     JOIN
     `{}.concept` c
     ON
@@ -238,7 +237,7 @@
     JOIN
     `{}._mapping_measurement` mm
     ON
-    mm.measurement_id = m.measurement_id 
+    mm.measurement_id = m.measurement_id
     JOIN
     `{}.concept` c
     ON
@@ -305,28 +304,28 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, color,
     """
     Function is used to create a bar graph for a particular dictionary with information about
     data quality
-    
+
     Parameters
     ----------
     info_dict (dictionary): contains information about data quality. The keys for the dictionary
         will serve as the x-axis labels whereas the values should serve as the 'y-value' for the
         particular bar
-        
+
     xlabel (str): label to display across the x-axis
-    
+
     ylabel (str): label to display across the y-axis
-    
+
     title (str): title for the graph
-    
+
     img_name (str): image used to save the image to the local repository
-    
+
     color (str): character used to specify the colours of the bars
-    
+
     total_diff_color (bool): indicates whether or not the last bar should be coloured red (
         as opposed to the rest of the bars on the graph). This is typically used when the ultimate
         value of the dictionary is of particular important (e.g. representing an 'aggregate' metric
         across all of the sites)
-        
+
     turnoff_x (bool): used to disable the x-axis labels (for each of the bars). This is typically used
         when there are so many x-axis labels that they overlap and obscure legibility
     """
@@ -357,25 +356,25 @@ def create_dicts_w_info(df, x_label, column_label):
     """
     This function is used to create a dictionary that can be easily converted to a
     graphical representation based on the values for a particular dataframe
-    
+
     Parameters
     ----------
     df (dataframe): dataframe that contains the information to be converted
-    
+
     x_label (string): the column of the dataframe whose rows will then be converted
         to they keys of a dictionary
-    
+
     column_label (string): the column that contains the data quality metric being
         investigated
-    
+
     Returns
     -------
     data_qual_info (dictionary): has the following structure
-        
+
         keys: the column for a particular dataframe that represents the elements that
             whose data quality is being compared (e.g. HPOs, different measurement/unit
             combinations)
-        
+
         values: the data quality metric being compared
     """
     rows = df[x_label].unique().tolist()
diff --git a/data_steward/analytics/cdr_ops/systematic_scripts/notes_volume_distribution.py b/data_steward/analytics/cdr_ops/systematic_scripts/notes_volume_distribution.py
index 2a6374cead..d2c7662837 100644
--- a/data_steward/analytics/cdr_ops/systematic_scripts/notes_volume_distribution.py
+++ b/data_steward/analytics/cdr_ops/systematic_scripts/notes_volume_distribution.py
@@ -21,7 +21,6 @@
 # %load_ext google.cloud.bigquery
 
 # +
-import bq_utils
 import utils.bq
 from notebooks import parameters
 
@@ -68,11 +67,11 @@ def create_dicts_w_info(df, column_label):
     """
     This function is used to create a dictionary that can be easily converted to a
     graphical representation based on the values for a particular dataframe
-    
+
     Parameters
     ----------
     df (dataframe): dataframe that contains the information to be converted
-    
+
     column_label (string): the column of the dataframe whose rows will then be
         converted to the keys of the dictionary
     """
@@ -95,23 +94,23 @@ def create_graphs(info_dict, xlabel, ylabel, title, img_name, colour,
     """
     Function is used to create a bar graph for a particular dictionary with information about
     data quality
-    
+
     Parameters
     ----------
     info_dict (dictionary): contains information about data quality. The keys for the dictionary
         will serve as the x-axis labels whereas the values should serve as the 'y-value' for the
         particular bar
-        
+
     xlabel (str): label to display across the x-axis
-    
+
     ylabel (str): label to display across the y-axis
-    
+
     title (str): title for the graph
-    
+
     img_name (str): image used to save the image to the local repository
-    
+
     colour (str): character used to specify the colours of the bars
-    
+
     total_diff_colour (bool): indicates whether or not the last bar should be coloured red (
         as opposed to the rest of the bars on the graph). This is typically used when the ultimate
         value of the dictionary is of particular important (e.g. representing an 'aggregate' metric
diff --git a/data_steward/validation/metrics/required_labs.py b/data_steward/validation/metrics/required_labs.py
index 2bcdd358f4..3b3b50ec30 100644
--- a/data_steward/validation/metrics/required_labs.py
+++ b/data_steward/validation/metrics/required_labs.py
@@ -7,9 +7,9 @@
 
 # Project imports
 import app_identity
-import bq_utils
 import resources
 import common
+import bq_utils
 from constants import bq_utils as bq_consts
 from gcloud.bq import BigQueryClient
 from validation.metrics.required_labs_sql import (IDENTIFY_LABS_QUERY,
@@ -165,7 +165,7 @@ def get_lab_concept_summary_query(client, hpo_id):
     Get the query that checks if the HPO site has submitted the required labs
     :param client: a BigQueryClient
     :param hpo_id: Identifies the HPO site
-    :return: 
+    :return:
     """
     dataset_id = common.BIGQUERY_DATASET_ID
     hpo_measurement_table = resources.get_table_id(common.MEASUREMENT,
diff --git a/tests/integration_tests/data_steward/retraction/retract_data_gcs_test.py b/tests/integration_tests/data_steward/retraction/retract_data_gcs_test.py
index 294e1a4c0d..16998d83da 100644
--- a/tests/integration_tests/data_steward/retraction/retract_data_gcs_test.py
+++ b/tests/integration_tests/data_steward/retraction/retract_data_gcs_test.py
@@ -10,7 +10,6 @@
 
 # Project imports
 import app_identity
-import bq_utils
 from common import BIGQUERY_DATASET_ID
 from tests import test_util
 from retraction import retract_data_gcs as rd
diff --git a/tests/integration_tests/data_steward/validation/export_test.py b/tests/integration_tests/data_steward/validation/export_test.py
index cb198a860a..7586cf559b 100644
--- a/tests/integration_tests/data_steward/validation/export_test.py
+++ b/tests/integration_tests/data_steward/validation/export_test.py
@@ -7,7 +7,6 @@
 
 # Project imports
 import app_identity
-import bq_utils
 import common
 from gcloud.gcs import StorageClient
 from gcloud.bq import BigQueryClient