diff --git a/data_steward/analytics/cdr_ops/clean_rdr_export_qc.py b/data_steward/analytics/cdr_ops/clean_rdr_export_qc.py
index 9d18aa5cda..14248b11d9 100644
--- a/data_steward/analytics/cdr_ops/clean_rdr_export_qc.py
+++ b/data_steward/analytics/cdr_ops/clean_rdr_export_qc.py
@@ -31,15 +31,232 @@
from gcloud.bq import BigQueryClient
from analytics.cdr_ops.notebook_utils import execute, IMPERSONATION_SCOPES, render_message
-# # Table comparison
-# The export should generally contain the same tables from month to month.
-# Tables found only in the old or the new export are listed below.
-
impersonation_creds = auth.get_impersonation_credentials(
run_as, target_scopes=IMPERSONATION_SCOPES)
client = BigQueryClient(project_id, credentials=impersonation_creds)
+# This list is created by querying the redcap surveys. In case of needed update, query provided in the comments of DC3407
+expected_strings = ['cidi5_20', 'cidi5_24', 'cidi5_28', 'cidi5_31', 'mhqukb_48_age',
+ 'mhqukb_50_number', 'mhqukb_51_number', 'mhqukb_52_number',
+ 'mhqukb_53_number', 'record_id', 'helpmewithconsent_name',
+ 'other_concerns', 'other_reasons', 'resultsconsent_emailmecopy',
+ 'resultsconsent_signaturedate', 'consentpii_helpwithconsentsignature',
+ 'extraconsent_signature_type', 'extraconsent_todaysdate',
+ 'piiaddress_streetaddress', 'piiaddress_streetaddress2',
+ 'piibirthinformation_birthdate', 'piicontactinformation_phone',
+ 'piiname_first', 'piiname_last', 'piiname_middle',
+ 'streetaddress_piicity', 'streetaddress_piizip', 'basics_11a_cope_a_33',
+ 'basics_xx', 'basics_xx20', 'cdc_covid_19_7_xx22_date', 'cope_a_126',
+ 'cope_a_160', 'cope_a_85', 'copect_50_xx19_cope_a_152',
+ 'copect_50_xx19_cope_a_198', 'copect_50_xx19_cope_a_57',
+ 'cu_covid_cope_a_204', 'eds_follow_up_1_xx', 'ipaq_1_cope_a_24',
+ 'ipaq_2_cope_a_160', 'ipaq_2_cope_a_85', 'ipaq_3_cope_a_24',
+ 'ipaq_4_cope_a_160', 'ipaq_4_cope_a_85', 'ipaq_5_cope_a_24',
+ 'ipaq_6_cope_a_160', 'ipaq_6_cope_a_85', 'lifestyle_2_xx12_cope_a_152',
+ 'lifestyle_2_xx12_cope_a_198', 'lifestyle_2_xx12_cope_a_57',
+ 'tsu_ds5_13_xx42_cope_a_226', 'cdc_covid_19_7_xx23_other_cope_a_204',
+ 'cdc_covid_19_n_a2', 'cdc_covid_19_n_a4', 'cdc_covid_19_n_a8',
+ 'cope_aou_xx_2_a', 'dmfs_29a', 'msds_17_c',
+ 'nhs_covid_fhc17b_cope_a_226', 'ehrconsentpii_helpwithconsentsignature',
+ 'ehrconsentpii_todaysdate', 'ehrconsentpii_todaysdateilhippawitness',
+ 'sensitivetype2_domesticviolence', 'sensitivetype2_genetictesting',
+ 'sensitivetype2_hivaids', 'sensitivetype2_mentalhealth',
+ 'sensitivetype2_substanceuse', 'signature_type', 'cidi5_15',
+ 'mhqukb_25_number', 'mhqukb_26_age', 'mhqukb_28_age', 'ss_2_age',
+ 'ss_3_age_1', 'ss_3_age_2', 'ss_3_number',
+ 'english_exploring_the_mind_consent_form', 'etm_help_name',
+ 'cdc_covid_xx_a_date1', 'cdc_covid_xx_a_date2',
+ 'cdc_covid_xx_b_firstdose_other', 'cdc_covid_xx_b_seconddose_other',
+ 'cdc_covid_xx_symptom_cope_350',
+ 'cdc_covid_xx_symptom_seconddose_cope_350', 'dmfs_29_seconddose_other',
+ 'othercancer_daughterfreetextbox', 'othercancer_fatherfreetextbox',
+ 'othercancer_grandparentfreetextbox', 'othercancer_motherfreetextbox',
+ 'othercancer_siblingfreetextbox', 'othercancer_sonfreetextbox',
+ 'othercondition_daughterfreetextbox', 'othercondition_fatherfreetextbox',
+ 'othercondition_grandparentfreetextbox',
+ 'othercondition_motherfreetextbox', 'othercondition_siblingfreetextbox',
+ 'othercondition_sonfreetextbox', 'cdc_covid_xx_b_other',
+ 'otherdelayedmedicalcare_freetext',
+ 'attemptquitsmoking_completelyquitage', 'otherspecify_otherdrugstextbox',
+ 'smoking_averagedailycigarettenumber',
+ 'smoking_currentdailycigarettenumber',
+ 'smoking_dailysmokestartingagenumber', 'smoking_numberofyearsnumber',
+ 'cdc_covid_xx_a_date10', 'cdc_covid_xx_a_date11',
+ 'cdc_covid_xx_a_date12', 'cdc_covid_xx_a_date13',
+ 'cdc_covid_xx_a_date14', 'cdc_covid_xx_a_date15',
+ 'cdc_covid_xx_a_date16', 'cdc_covid_xx_a_date17', 'cdc_covid_xx_a_date3',
+ 'cdc_covid_xx_a_date4', 'cdc_covid_xx_a_date5', 'cdc_covid_xx_a_date6',
+ 'cdc_covid_xx_a_date7', 'cdc_covid_xx_a_date8', 'cdc_covid_xx_a_date9',
+ 'cdc_covid_xx_b_dose10_other', 'cdc_covid_xx_b_dose11_other',
+ 'cdc_covid_xx_b_dose12_other', 'cdc_covid_xx_b_dose13_other',
+ 'cdc_covid_xx_b_dose14_other', 'cdc_covid_xx_b_dose15_other',
+ 'cdc_covid_xx_b_dose16_other', 'cdc_covid_xx_b_dose17_other',
+ 'cdc_covid_xx_b_dose3_other', 'cdc_covid_xx_b_dose4_other',
+ 'cdc_covid_xx_b_dose5_other', 'cdc_covid_xx_b_dose6_other',
+ 'cdc_covid_xx_b_dose7_other', 'cdc_covid_xx_b_dose8_other',
+ 'cdc_covid_xx_b_dose9_other', 'cdc_covid_xx_symptom_cope_350_dose10',
+ 'cdc_covid_xx_symptom_cope_350_dose11',
+ 'cdc_covid_xx_symptom_cope_350_dose12',
+ 'cdc_covid_xx_symptom_cope_350_dose13',
+ 'cdc_covid_xx_symptom_cope_350_dose14',
+ 'cdc_covid_xx_symptom_cope_350_dose15',
+ 'cdc_covid_xx_symptom_cope_350_dose16',
+ 'cdc_covid_xx_symptom_cope_350_dose17',
+ 'cdc_covid_xx_symptom_cope_350_dose3',
+ 'cdc_covid_xx_symptom_cope_350_dose4',
+ 'cdc_covid_xx_symptom_cope_350_dose5',
+ 'cdc_covid_xx_symptom_cope_350_dose6',
+ 'cdc_covid_xx_symptom_cope_350_dose7',
+ 'cdc_covid_xx_symptom_cope_350_dose8',
+ 'cdc_covid_xx_symptom_cope_350_dose9', 'cdc_covid_xx_type_dose10_other',
+ 'cdc_covid_xx_type_dose11_other', 'cdc_covid_xx_type_dose12_other',
+ 'cdc_covid_xx_type_dose13_other', 'cdc_covid_xx_type_dose14_other',
+ 'cdc_covid_xx_type_dose15_other', 'cdc_covid_xx_type_dose16_other',
+ 'cdc_covid_xx_type_dose17_other', 'cdc_covid_xx_type_dose3_other',
+ 'cdc_covid_xx_type_dose4_other', 'cdc_covid_xx_type_dose5_other',
+ 'cdc_covid_xx_type_dose6_other', 'cdc_covid_xx_type_dose7_other',
+ 'cdc_covid_xx_type_dose8_other', 'cdc_covid_xx_type_dose9_other',
+ 'dmfs_29_additionaldose_other',
+ 'organtransplant_bloodvesseltransplantdate',
+ 'organtransplant_bonetransplantdate',
+ 'organtransplant_corneatransplantdate',
+ 'organtransplant_hearttransplantdate',
+ 'organtransplant_intestinetransplantdate',
+ 'organtransplant_kidneytransplantdate',
+ 'organtransplant_livertransplantdate',
+ 'organtransplant_lungtransplantdate',
+ 'organtransplant_otherorgantransplantdate',
+ 'organtransplant_othertissuetransplantdate',
+ 'organtransplant_pancreastransplantdate',
+ 'organtransplant_skintransplantdate',
+ 'organtransplant_valvetransplantdate', 'otherorgan_freetextbox',
+ 'othertissue_freetextbox',
+ 'outsidetravel6month_outsidetravel6monthhowlong',
+ 'outsidetravel6month_outsidetravel6monthwheretraveled',
+ 'overallhealth_hysterectomyhistoryage',
+ 'overallhealthovaryremovalhistoryage',
+ 'otherarthritis_daughterfreetextbox', 'otherarthritis_fatherfreetextbox',
+ 'otherarthritis_freetextbox', 'otherarthritis_grandparentfreetextbox',
+ 'otherarthritis_motherfreetextbox', 'otherarthritis_siblingfreetextbox',
+ 'otherarthritis_sonfreetextbox',
+ 'otherbonejointmuscle_daughterfreetextbox',
+ 'otherbonejointmuscle_fatherfreetextbox',
+ 'otherbonejointmuscle_freetextbox',
+ 'otherbonejointmuscle_grandparentfreetextbox',
+ 'otherbonejointmuscle_motherfreetextbox',
+ 'otherbonejointmuscle_siblingfreetextbox',
+ 'otherbonejointmuscle_sonfreetextbox',
+ 'otherbrainnervoussystem_daughterfreetextbox',
+ 'otherbrainnervoussystem_fatherfreetextbox',
+ 'otherbrainnervoussystem_freetextbox',
+ 'otherbrainnervoussystem_grandparentfreetextbox',
+ 'otherbrainnervoussystem_motherfreetextbox',
+ 'otherbrainnervoussystem_siblingfreetextbox',
+ 'otherbrainnervoussystem_sonfreetextbox', 'othercancer_freetextbox',
+ 'otherdiabetes_daughterfreetextbox', 'otherdiabetes_fatherfreetextbox',
+ 'otherdiabetes_freetextbox', 'otherdiabetes_grandparentfreetextbox',
+ 'otherdiabetes_motherfreetextbox', 'otherdiabetes_siblingfreetextbox',
+ 'otherdiabetes_sonfreetextbox', 'otherdiagnosis_daughterfreetextbox',
+ 'otherdiagnosis_fatherfreetextbox', 'otherdiagnosis_freetextbox',
+ 'otherdiagnosis_grandparentfreetextbox',
+ 'otherdiagnosis_motherfreetextbox', 'otherdiagnosis_siblingfreetextbox',
+ 'otherdiagnosis_sonfreetextbox',
+ 'otherdigestivecondition_daughterfreetextbox',
+ 'otherdigestivecondition_fatherfreetextbox',
+ 'otherdigestivecondition_freetextbox',
+ 'otherdigestivecondition_grandparentfreetextbox',
+ 'otherdigestivecondition_motherfreetextbox',
+ 'otherdigestivecondition_siblingfreetextbox',
+ 'otherdigestivecondition_sonfreetextbox',
+ 'otherhearingeye_daughterfreetextbox',
+ 'otherhearingeye_fatherfreetextbox', 'otherhearingeye_freetextbox',
+ 'otherhearingeye_grandparentfreetextbox',
+ 'otherhearingeye_motherfreetextbox',
+ 'otherhearingeye_siblingfreetextbox', 'otherhearingeye_sonfreetextbox',
+ 'otherheartorbloodcondition_daughterfreetextbox',
+ 'otherheartorbloodcondition_fatherfreetextbox',
+ 'otherheartorbloodcondition_freetextbox',
+ 'otherheartorbloodcondition_grandparentfreetextbox',
+ 'otherheartorbloodcondition_motherfreetextbox',
+ 'otherheartorbloodcondition_siblingfreetextbox',
+ 'otherheartorbloodcondition_sonfreetextbox',
+ 'otherhormoneendocrine_daughterfreetextbox',
+ 'otherhormoneendocrine_fatherfreetextbox',
+ 'otherhormoneendocrine_freetextbox',
+ 'otherhormoneendocrine_grandparentfreetextbox',
+ 'otherhormoneendocrine_motherfreetextbox',
+ 'otherhormoneendocrine_siblingfreetextbox',
+ 'otherhormoneendocrine_sonfreetextbox',
+ 'otherinfectiousdisease_freetextbox',
+ 'otherkidneycondition_daughterfreetextbox',
+ 'otherkidneycondition_fatherfreetextbox',
+ 'otherkidneycondition_freetextbox',
+ 'otherkidneycondition_grandparentfreetextbox',
+ 'otherkidneycondition_motherfreetextbox',
+ 'otherkidneycondition_siblingfreetextbox',
+ 'otherkidneycondition_sonfreetextbox',
+ 'othermentalhealthsubstanceuse_daughterfreetextbox',
+ 'othermentalhealthsubstanceuse_fatherfreetextbox',
+ 'othermentalhealthsubstanceuse_freetextbox',
+ 'othermentalhealthsubstanceuse_grandparentfreetextb',
+ 'othermentalhealthsubstanceuse_motherfreetextbox',
+ 'othermentalhealthsubstanceuse_siblingfreetextbox',
+ 'othermentalhealthsubstanceuse_sonfreetextbox',
+ 'otherrespiratory_daughterfreetextbox',
+ 'otherrespiratory_fatherfreetextbox', 'otherrespiratory_freetextbox',
+ 'otherrespiratory_grandparentfreetextbox',
+ 'otherrespiratory_motherfreetextbox',
+ 'otherrespiratory_siblingfreetextbox', 'otherrespiratory_sonfreetextbox',
+ 'otherthyroid_daughterfreetextbox', 'otherthyroid_fatherfreetextbox',
+ 'otherthyroid_freetextbox', 'otherthyroid_grandparentfreetextbox',
+ 'otherthyroid_motherfreetextbox', 'otherthyroid_siblingfreetextbox',
+ 'otherthyroid_sonfreetextbox', 'self_reported_height_cm',
+ 'self_reported_height_ft', 'self_reported_height_in',
+ 'self_reported_weight_kg', 'self_reported_weight_pounds',
+ 'sdoh_eds_follow_up_1_xx', 'urs_8c', 'aian_tribe',
+ 'aiannoneofthesedescribeme_aianfreetext',
+ 'blacknoneofthesedescribeme_blackfreetext',
+ 'employmentworkaddress_addresslineone',
+ 'employmentworkaddress_addresslinetwo', 'employmentworkaddress_city',
+ 'employmentworkaddress_country', 'employmentworkaddress_zipcode',
+ 'hispanicnoneofthesedescribeme_hispanicfreetext',
+ 'livingsituation_howmanypeople',
+ 'livingsituation_livingsituationfreetext',
+ 'livingsituation_peopleunder18',
+ 'menanoneofthesedescribeme_menafreetext',
+ 'nhpinoneofthesedescribeme_nhpifreetext',
+ 'noneofthesedescribeme_asianfreetext', 'otherhealthplan_freetext',
+ 'persononeaddress_persononeaddresscity',
+ 'persononeaddress_persononeaddresszipcode',
+ 'secondarycontactinfo_persononeaddressone',
+ 'secondarycontactinfo_persononeaddresstwo',
+ 'secondarycontactinfo_persononeemail',
+ 'secondarycontactinfo_persononefirstname',
+ 'secondarycontactinfo_persononelastname',
+ 'secondarycontactinfo_persononemiddleinitial',
+ 'secondarycontactinfo_persononetelephone',
+ 'secondarycontactinfo_secondcontactsaddressone',
+ 'secondarycontactinfo_secondcontactsaddresstwo',
+ 'secondarycontactinfo_secondcontactsemail',
+ 'secondarycontactinfo_secondcontactsfirstname',
+ 'secondarycontactinfo_secondcontactslastname',
+ 'secondarycontactinfo_secondcontactsmiddleinitial',
+ 'secondarycontactinfo_secondcontactsnumber',
+ 'secondcontactsaddress_secondcontactcity',
+ 'secondcontactsaddress_secondcontactzipcode',
+ 'sexatbirthnoneofthese_sexatbirthtextbox',
+ 'socialsecurity_socialsecuritynumber',
+ 'somethingelse_sexualitysomethingelsetextbox',
+ 'specifiedgender_specifiedgendertextbox', 'thebasics_countryborntextbox',
+ 'whatraceethnicity_raceethnicitynoneofthese',
+ 'whitenoneofthesedescribeme_whitefreetext', 'timeofday',
+ 'wearconsent_todaysdate']
+
+# # Table comparison
+# The export should generally contain the same tables from month to month.
+# Tables found only in the old or the new export are listed below.
+
tpl = JINJA_ENV.from_string('''
SELECT
COALESCE(curr.table_id, prev.table_id) AS table_id
@@ -187,20 +404,34 @@
query = tpl.render(new_rdr=new_rdr, project_id=project_id)
execute(client, query)
-# # Check if numeric data in value_as_string
+# # Check numeric data in value_as_string
# Some numeric data is expected in value_as_string. For example, zip codes or other contact specific information.
-
+#
+# **If the check fails, manually review the results.**
+# False positives are possible. The suggested first step of investigation is to run the query in the comments of DC3407. This will provide any new text type questions from the surveys that can be added to the list `expected_strings`.
+# +
tpl = JINJA_ENV.from_string("""
SELECT
observation_source_value
,COUNT(1) AS n
FROM `{{project_id}}.{{new_rdr}}.observation`
WHERE SAFE_CAST(value_as_string AS INT64) IS NOT NULL
+AND value_source_concept_id = 0
+AND LOWER(observation_source_value) NOT IN UNNEST ({{expected_strings}})
+AND NOT REGEXP_CONTAINS(LOWER(observation_source_value), '(?i)snap|signature|address|email|number|cohortgroup')
GROUP BY 1
ORDER BY 2 DESC
""")
-query = tpl.render(new_rdr=new_rdr, project_id=project_id)
-execute(client, query)
+query = tpl.render(new_rdr=new_rdr, project_id=project_id,expected_strings=expected_strings)
+df = execute(client, query)
+
+success_msg = 'All records with a number in value_as_string are expected to be text.'
+failure_msg = 'Some records that have a number value_as_string might not be expected. See description.'
+
+render_message(df,
+ success_msg,
+ failure_msg)
+# -
# # All COPE `questionnaire_response_id`s are in COPE version map
# Any `questionnaire_response_id`s missing from the map will be listed below.
@@ -1007,4 +1238,3 @@
success_msg,
failure_msg,
failure_msg_args={'code_count': len(df)})
-# -
\ No newline at end of file
diff --git a/data_steward/analytics/cdr_ops/raw_rdr_export_qc.py b/data_steward/analytics/cdr_ops/raw_rdr_export_qc.py
index 3504dae56f..2553091687 100644
--- a/data_steward/analytics/cdr_ops/raw_rdr_export_qc.py
+++ b/data_steward/analytics/cdr_ops/raw_rdr_export_qc.py
@@ -54,6 +54,224 @@
'wear_consent_ptsc'
]
+# This list is created by querying the redcap surveys. In case of needed update, query provided in the comments of DC3407
+expected_strings = ['cidi5_20', 'cidi5_24', 'cidi5_28', 'cidi5_31', 'mhqukb_48_age',
+ 'mhqukb_50_number', 'mhqukb_51_number', 'mhqukb_52_number',
+ 'mhqukb_53_number', 'record_id', 'helpmewithconsent_name',
+ 'other_concerns', 'other_reasons', 'resultsconsent_emailmecopy',
+ 'resultsconsent_signaturedate', 'consentpii_helpwithconsentsignature',
+ 'extraconsent_signature_type', 'extraconsent_todaysdate',
+ 'piiaddress_streetaddress', 'piiaddress_streetaddress2',
+ 'piibirthinformation_birthdate', 'piicontactinformation_phone',
+ 'piiname_first', 'piiname_last', 'piiname_middle',
+ 'streetaddress_piicity', 'streetaddress_piizip', 'basics_11a_cope_a_33',
+ 'basics_xx', 'basics_xx20', 'cdc_covid_19_7_xx22_date', 'cope_a_126',
+ 'cope_a_160', 'cope_a_85', 'copect_50_xx19_cope_a_152',
+ 'copect_50_xx19_cope_a_198', 'copect_50_xx19_cope_a_57',
+ 'cu_covid_cope_a_204', 'eds_follow_up_1_xx', 'ipaq_1_cope_a_24',
+ 'ipaq_2_cope_a_160', 'ipaq_2_cope_a_85', 'ipaq_3_cope_a_24',
+ 'ipaq_4_cope_a_160', 'ipaq_4_cope_a_85', 'ipaq_5_cope_a_24',
+ 'ipaq_6_cope_a_160', 'ipaq_6_cope_a_85', 'lifestyle_2_xx12_cope_a_152',
+ 'lifestyle_2_xx12_cope_a_198', 'lifestyle_2_xx12_cope_a_57',
+ 'tsu_ds5_13_xx42_cope_a_226', 'cdc_covid_19_7_xx23_other_cope_a_204',
+ 'cdc_covid_19_n_a2', 'cdc_covid_19_n_a4', 'cdc_covid_19_n_a8',
+ 'cope_aou_xx_2_a', 'dmfs_29a', 'msds_17_c',
+ 'nhs_covid_fhc17b_cope_a_226', 'ehrconsentpii_helpwithconsentsignature',
+ 'ehrconsentpii_todaysdate', 'ehrconsentpii_todaysdateilhippawitness',
+ 'sensitivetype2_domesticviolence', 'sensitivetype2_genetictesting',
+ 'sensitivetype2_hivaids', 'sensitivetype2_mentalhealth',
+ 'sensitivetype2_substanceuse', 'signature_type', 'cidi5_15',
+ 'mhqukb_25_number', 'mhqukb_26_age', 'mhqukb_28_age', 'ss_2_age',
+ 'ss_3_age_1', 'ss_3_age_2', 'ss_3_number',
+ 'english_exploring_the_mind_consent_form', 'etm_help_name',
+ 'cdc_covid_xx_a_date1', 'cdc_covid_xx_a_date2',
+ 'cdc_covid_xx_b_firstdose_other', 'cdc_covid_xx_b_seconddose_other',
+ 'cdc_covid_xx_symptom_cope_350',
+ 'cdc_covid_xx_symptom_seconddose_cope_350', 'dmfs_29_seconddose_other',
+ 'othercancer_daughterfreetextbox', 'othercancer_fatherfreetextbox',
+ 'othercancer_grandparentfreetextbox', 'othercancer_motherfreetextbox',
+ 'othercancer_siblingfreetextbox', 'othercancer_sonfreetextbox',
+ 'othercondition_daughterfreetextbox', 'othercondition_fatherfreetextbox',
+ 'othercondition_grandparentfreetextbox',
+ 'othercondition_motherfreetextbox', 'othercondition_siblingfreetextbox',
+ 'othercondition_sonfreetextbox', 'cdc_covid_xx_b_other',
+ 'otherdelayedmedicalcare_freetext',
+ 'attemptquitsmoking_completelyquitage', 'otherspecify_otherdrugstextbox',
+ 'smoking_averagedailycigarettenumber',
+ 'smoking_currentdailycigarettenumber',
+ 'smoking_dailysmokestartingagenumber', 'smoking_numberofyearsnumber',
+ 'cdc_covid_xx_a_date10', 'cdc_covid_xx_a_date11',
+ 'cdc_covid_xx_a_date12', 'cdc_covid_xx_a_date13',
+ 'cdc_covid_xx_a_date14', 'cdc_covid_xx_a_date15',
+ 'cdc_covid_xx_a_date16', 'cdc_covid_xx_a_date17', 'cdc_covid_xx_a_date3',
+ 'cdc_covid_xx_a_date4', 'cdc_covid_xx_a_date5', 'cdc_covid_xx_a_date6',
+ 'cdc_covid_xx_a_date7', 'cdc_covid_xx_a_date8', 'cdc_covid_xx_a_date9',
+ 'cdc_covid_xx_b_dose10_other', 'cdc_covid_xx_b_dose11_other',
+ 'cdc_covid_xx_b_dose12_other', 'cdc_covid_xx_b_dose13_other',
+ 'cdc_covid_xx_b_dose14_other', 'cdc_covid_xx_b_dose15_other',
+ 'cdc_covid_xx_b_dose16_other', 'cdc_covid_xx_b_dose17_other',
+ 'cdc_covid_xx_b_dose3_other', 'cdc_covid_xx_b_dose4_other',
+ 'cdc_covid_xx_b_dose5_other', 'cdc_covid_xx_b_dose6_other',
+ 'cdc_covid_xx_b_dose7_other', 'cdc_covid_xx_b_dose8_other',
+ 'cdc_covid_xx_b_dose9_other', 'cdc_covid_xx_symptom_cope_350_dose10',
+ 'cdc_covid_xx_symptom_cope_350_dose11',
+ 'cdc_covid_xx_symptom_cope_350_dose12',
+ 'cdc_covid_xx_symptom_cope_350_dose13',
+ 'cdc_covid_xx_symptom_cope_350_dose14',
+ 'cdc_covid_xx_symptom_cope_350_dose15',
+ 'cdc_covid_xx_symptom_cope_350_dose16',
+ 'cdc_covid_xx_symptom_cope_350_dose17',
+ 'cdc_covid_xx_symptom_cope_350_dose3',
+ 'cdc_covid_xx_symptom_cope_350_dose4',
+ 'cdc_covid_xx_symptom_cope_350_dose5',
+ 'cdc_covid_xx_symptom_cope_350_dose6',
+ 'cdc_covid_xx_symptom_cope_350_dose7',
+ 'cdc_covid_xx_symptom_cope_350_dose8',
+ 'cdc_covid_xx_symptom_cope_350_dose9', 'cdc_covid_xx_type_dose10_other',
+ 'cdc_covid_xx_type_dose11_other', 'cdc_covid_xx_type_dose12_other',
+ 'cdc_covid_xx_type_dose13_other', 'cdc_covid_xx_type_dose14_other',
+ 'cdc_covid_xx_type_dose15_other', 'cdc_covid_xx_type_dose16_other',
+ 'cdc_covid_xx_type_dose17_other', 'cdc_covid_xx_type_dose3_other',
+ 'cdc_covid_xx_type_dose4_other', 'cdc_covid_xx_type_dose5_other',
+ 'cdc_covid_xx_type_dose6_other', 'cdc_covid_xx_type_dose7_other',
+ 'cdc_covid_xx_type_dose8_other', 'cdc_covid_xx_type_dose9_other',
+ 'dmfs_29_additionaldose_other',
+ 'organtransplant_bloodvesseltransplantdate',
+ 'organtransplant_bonetransplantdate',
+ 'organtransplant_corneatransplantdate',
+ 'organtransplant_hearttransplantdate',
+ 'organtransplant_intestinetransplantdate',
+ 'organtransplant_kidneytransplantdate',
+ 'organtransplant_livertransplantdate',
+ 'organtransplant_lungtransplantdate',
+ 'organtransplant_otherorgantransplantdate',
+ 'organtransplant_othertissuetransplantdate',
+ 'organtransplant_pancreastransplantdate',
+ 'organtransplant_skintransplantdate',
+ 'organtransplant_valvetransplantdate', 'otherorgan_freetextbox',
+ 'othertissue_freetextbox',
+ 'outsidetravel6month_outsidetravel6monthhowlong',
+ 'outsidetravel6month_outsidetravel6monthwheretraveled',
+ 'overallhealth_hysterectomyhistoryage',
+ 'overallhealthovaryremovalhistoryage',
+ 'otherarthritis_daughterfreetextbox', 'otherarthritis_fatherfreetextbox',
+ 'otherarthritis_freetextbox', 'otherarthritis_grandparentfreetextbox',
+ 'otherarthritis_motherfreetextbox', 'otherarthritis_siblingfreetextbox',
+ 'otherarthritis_sonfreetextbox',
+ 'otherbonejointmuscle_daughterfreetextbox',
+ 'otherbonejointmuscle_fatherfreetextbox',
+ 'otherbonejointmuscle_freetextbox',
+ 'otherbonejointmuscle_grandparentfreetextbox',
+ 'otherbonejointmuscle_motherfreetextbox',
+ 'otherbonejointmuscle_siblingfreetextbox',
+ 'otherbonejointmuscle_sonfreetextbox',
+ 'otherbrainnervoussystem_daughterfreetextbox',
+ 'otherbrainnervoussystem_fatherfreetextbox',
+ 'otherbrainnervoussystem_freetextbox',
+ 'otherbrainnervoussystem_grandparentfreetextbox',
+ 'otherbrainnervoussystem_motherfreetextbox',
+ 'otherbrainnervoussystem_siblingfreetextbox',
+ 'otherbrainnervoussystem_sonfreetextbox', 'othercancer_freetextbox',
+ 'otherdiabetes_daughterfreetextbox', 'otherdiabetes_fatherfreetextbox',
+ 'otherdiabetes_freetextbox', 'otherdiabetes_grandparentfreetextbox',
+ 'otherdiabetes_motherfreetextbox', 'otherdiabetes_siblingfreetextbox',
+ 'otherdiabetes_sonfreetextbox', 'otherdiagnosis_daughterfreetextbox',
+ 'otherdiagnosis_fatherfreetextbox', 'otherdiagnosis_freetextbox',
+ 'otherdiagnosis_grandparentfreetextbox',
+ 'otherdiagnosis_motherfreetextbox', 'otherdiagnosis_siblingfreetextbox',
+ 'otherdiagnosis_sonfreetextbox',
+ 'otherdigestivecondition_daughterfreetextbox',
+ 'otherdigestivecondition_fatherfreetextbox',
+ 'otherdigestivecondition_freetextbox',
+ 'otherdigestivecondition_grandparentfreetextbox',
+ 'otherdigestivecondition_motherfreetextbox',
+ 'otherdigestivecondition_siblingfreetextbox',
+ 'otherdigestivecondition_sonfreetextbox',
+ 'otherhearingeye_daughterfreetextbox',
+ 'otherhearingeye_fatherfreetextbox', 'otherhearingeye_freetextbox',
+ 'otherhearingeye_grandparentfreetextbox',
+ 'otherhearingeye_motherfreetextbox',
+ 'otherhearingeye_siblingfreetextbox', 'otherhearingeye_sonfreetextbox',
+ 'otherheartorbloodcondition_daughterfreetextbox',
+ 'otherheartorbloodcondition_fatherfreetextbox',
+ 'otherheartorbloodcondition_freetextbox',
+ 'otherheartorbloodcondition_grandparentfreetextbox',
+ 'otherheartorbloodcondition_motherfreetextbox',
+ 'otherheartorbloodcondition_siblingfreetextbox',
+ 'otherheartorbloodcondition_sonfreetextbox',
+ 'otherhormoneendocrine_daughterfreetextbox',
+ 'otherhormoneendocrine_fatherfreetextbox',
+ 'otherhormoneendocrine_freetextbox',
+ 'otherhormoneendocrine_grandparentfreetextbox',
+ 'otherhormoneendocrine_motherfreetextbox',
+ 'otherhormoneendocrine_siblingfreetextbox',
+ 'otherhormoneendocrine_sonfreetextbox',
+ 'otherinfectiousdisease_freetextbox',
+ 'otherkidneycondition_daughterfreetextbox',
+ 'otherkidneycondition_fatherfreetextbox',
+ 'otherkidneycondition_freetextbox',
+ 'otherkidneycondition_grandparentfreetextbox',
+ 'otherkidneycondition_motherfreetextbox',
+ 'otherkidneycondition_siblingfreetextbox',
+ 'otherkidneycondition_sonfreetextbox',
+ 'othermentalhealthsubstanceuse_daughterfreetextbox',
+ 'othermentalhealthsubstanceuse_fatherfreetextbox',
+ 'othermentalhealthsubstanceuse_freetextbox',
+ 'othermentalhealthsubstanceuse_grandparentfreetextb',
+ 'othermentalhealthsubstanceuse_motherfreetextbox',
+ 'othermentalhealthsubstanceuse_siblingfreetextbox',
+ 'othermentalhealthsubstanceuse_sonfreetextbox',
+ 'otherrespiratory_daughterfreetextbox',
+ 'otherrespiratory_fatherfreetextbox', 'otherrespiratory_freetextbox',
+ 'otherrespiratory_grandparentfreetextbox',
+ 'otherrespiratory_motherfreetextbox',
+ 'otherrespiratory_siblingfreetextbox', 'otherrespiratory_sonfreetextbox',
+ 'otherthyroid_daughterfreetextbox', 'otherthyroid_fatherfreetextbox',
+ 'otherthyroid_freetextbox', 'otherthyroid_grandparentfreetextbox',
+ 'otherthyroid_motherfreetextbox', 'otherthyroid_siblingfreetextbox',
+ 'otherthyroid_sonfreetextbox', 'self_reported_height_cm',
+ 'self_reported_height_ft', 'self_reported_height_in',
+ 'self_reported_weight_kg', 'self_reported_weight_pounds',
+ 'sdoh_eds_follow_up_1_xx', 'urs_8c', 'aian_tribe',
+ 'aiannoneofthesedescribeme_aianfreetext',
+ 'blacknoneofthesedescribeme_blackfreetext',
+ 'employmentworkaddress_addresslineone',
+ 'employmentworkaddress_addresslinetwo', 'employmentworkaddress_city',
+ 'employmentworkaddress_country', 'employmentworkaddress_zipcode',
+ 'hispanicnoneofthesedescribeme_hispanicfreetext',
+ 'livingsituation_howmanypeople',
+ 'livingsituation_livingsituationfreetext',
+ 'livingsituation_peopleunder18',
+ 'menanoneofthesedescribeme_menafreetext',
+ 'nhpinoneofthesedescribeme_nhpifreetext',
+ 'noneofthesedescribeme_asianfreetext', 'otherhealthplan_freetext',
+ 'persononeaddress_persononeaddresscity',
+ 'persononeaddress_persononeaddresszipcode',
+ 'secondarycontactinfo_persononeaddressone',
+ 'secondarycontactinfo_persononeaddresstwo',
+ 'secondarycontactinfo_persononeemail',
+ 'secondarycontactinfo_persononefirstname',
+ 'secondarycontactinfo_persononelastname',
+ 'secondarycontactinfo_persononemiddleinitial',
+ 'secondarycontactinfo_persononetelephone',
+ 'secondarycontactinfo_secondcontactsaddressone',
+ 'secondarycontactinfo_secondcontactsaddresstwo',
+ 'secondarycontactinfo_secondcontactsemail',
+ 'secondarycontactinfo_secondcontactsfirstname',
+ 'secondarycontactinfo_secondcontactslastname',
+ 'secondarycontactinfo_secondcontactsmiddleinitial',
+ 'secondarycontactinfo_secondcontactsnumber',
+ 'secondcontactsaddress_secondcontactcity',
+ 'secondcontactsaddress_secondcontactzipcode',
+ 'sexatbirthnoneofthese_sexatbirthtextbox',
+ 'socialsecurity_socialsecuritynumber',
+ 'somethingelse_sexualitysomethingelsetextbox',
+ 'specifiedgender_specifiedgendertextbox', 'thebasics_countryborntextbox',
+ 'whatraceethnicity_raceethnicitynoneofthese',
+ 'whitenoneofthesedescribeme_whitefreetext', 'timeofday',
+ 'wearconsent_todaysdate']
+
+
# # Table comparison
# The export should generally contain the same tables from month to month.
# Tables found only in the old or the new export are listed below.
@@ -365,20 +583,35 @@
query = tpl.render(new_rdr=new_rdr, project_id=project_id)
execute(client, query)
-# # Check if numeric data in value_as_string
+# # Check numeric data in value_as_string
# Some numeric data is expected in value_as_string. For example, zip codes or other contact specific information.
+#
+# **If the check fails, manually review the results.**
+# False positives are possible. The suggested first step of investigation is to run the query in the comments of DC3407. This will provide any new text type questions from the surveys that can be added to the list `expected_strings`.
+# +
tpl = JINJA_ENV.from_string("""
SELECT
observation_source_value
,COUNT(1) AS n
FROM `{{project_id}}.{{new_rdr}}.observation`
WHERE SAFE_CAST(value_as_string AS INT64) IS NOT NULL
+AND value_source_concept_id = 0
+AND LOWER(observation_source_value) NOT IN UNNEST ({{expected_strings}})
+AND NOT REGEXP_CONTAINS(LOWER(observation_source_value), '(?i)snap|signature|address|email|number|cohortgroup')
GROUP BY 1
ORDER BY 2 DESC
""")
-query = tpl.render(new_rdr=new_rdr, project_id=project_id)
-execute(client, query)
+query = tpl.render(new_rdr=new_rdr, project_id=project_id,expected_strings=expected_strings)
+df = execute(client, query)
+
+success_msg = 'All records with a number in value_as_string are expected to be text.'
+failure_msg = 'Some records that have a number value_as_string might not be expected. See description.'
+
+render_message(df,
+ success_msg,
+ failure_msg)
+# -
# # All COPE `questionnaire_response_id`s are in COPE version map
# Any `questionnaire_response_id`s missing from the map will be listed below.