From 75f85c4913271495371b92dc1f4d81dcb3d6c292 Mon Sep 17 00:00:00 2001 From: Peggy Bertsch <65624611+pegbertsch@users.noreply.github.com> Date: Wed, 27 Mar 2024 14:55:57 -0500 Subject: [PATCH 1/3] [DA-3884] Updates to retention calculator logic per NIH --- .../offline/retention_eligible_import.py | 22 ++++++++++------- .../resource/generators/participant.py | 24 ++++++++++++------- tests/helpers/unittest_base.py | 2 +- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/rdr_service/offline/retention_eligible_import.py b/rdr_service/offline/retention_eligible_import.py index 2623c89c98..26e79b7f69 100644 --- a/rdr_service/offline/retention_eligible_import.py +++ b/rdr_service/offline/retention_eligible_import.py @@ -7,7 +7,7 @@ from rdr_service.app_util import nonprod from rdr_service.clock import CLOCK from rdr_service.code_constants import ( - COHORT_1_REVIEW_CONSENT_YES_CODE, COPE_MODULE, COPE_NOV_MODULE, COPE_DEC_MODULE, COPE_FEB_MODULE, + COPE_MODULE, COPE_NOV_MODULE, COPE_DEC_MODULE, COPE_FEB_MODULE, COPE_VACCINE_MINUTE_1_MODULE_CODE, COPE_VACCINE_MINUTE_2_MODULE_CODE, COPE_VACCINE_MINUTE_3_MODULE_CODE, COPE_VACCINE_MINUTE_4_MODULE_CODE, PRIMARY_CONSENT_UPDATE_MODULE, PRIMARY_CONSENT_UPDATE_QUESTION_CODE ) @@ -253,10 +253,11 @@ def _create_retention_eligible_metrics_obj_from_row(row, upload_date) -> Retenti ) -def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session): +def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session, ehr_validation=True): """Fill in the rdr eligibility calculations for comparison""" - retention_data = build_retention_data(participant_id=metrics_data.participantId, session=session) + retention_data = build_retention_data(participant_id=metrics_data.participantId, session=session, + ehr_validation=ehr_validation) if retention_data: metrics_data.rdr_retention_eligible = retention_data.is_eligible metrics_data.rdr_retention_eligible_time = retention_data.retention_eligible_date @@ -265,7 +266,7 @@ def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, se metrics_data.rdr_is_passively_retained = retention_data.is_passively_retained -def build_retention_data(participant_id, session) -> Optional[RetentionEligibility]: +def build_retention_data(participant_id, session, ehr_validation=True) -> Optional[RetentionEligibility]: summary_dao = ParticipantSummaryDao() summary: ParticipantSummary = summary_dao.get_with_session( session=session, @@ -284,7 +285,8 @@ def build_retention_data(participant_id, session) -> Optional[RetentionEligibili ), first_ehr_consent=_get_earliest_intent_for_ehr( session=session, - participant_id=summary.participantId + participant_id=summary.participantId, + needs_validation=ehr_validation ), is_deceased=summary.deceasedStatus == DeceasedStatus.APPROVED, is_withdrawn=summary.withdrawalStatus != WithdrawalStatus.NOT_WITHDRAWN, @@ -322,7 +324,8 @@ def build_retention_data(participant_id, session) -> Optional[RetentionEligibili aggregate_function=min # Get the earliest cohort 1 reconsent response ), gror_response_timestamp=summary.consentForGenomicsRORAuthored, - # Additions for DA-3705 (only NPH module consent info currently available is NPH1) + # Per DA-3884: NPH activity included for active retention calculations is limited to the initial NPH consent + # submission sent by PTSC to Participant endpoint (before eligibility or opt-ins are sent to NPH endpoints) nph_consent_timestamp=summary.consentForNphModule1Authored, etm_consent_timestamp=summary.consentForEtMAuthored, wear_consent_timestamp=summary.consentForWearStudyAuthored, @@ -333,11 +336,11 @@ def build_retention_data(participant_id, session) -> Optional[RetentionEligibili return RetentionEligibility(dependencies) -def _get_earliest_intent_for_ehr(session, participant_id) -> Optional[Consent]: +def _get_earliest_intent_for_ehr(session, participant_id, needs_validation=False) -> Optional[Consent]: date_range_list = QuestionnaireResponseRepository.get_interest_in_sharing_ehr_ranges( participant_id=participant_id, session=session, - validation_not_required=True + validation_not_required=(not needs_validation) ) if not date_range_list: return None @@ -363,7 +366,8 @@ def _aggregate_response_timestamps(session, participant_id, survey_code_list, ag # NOTE: This may need more extensive changes when VA/Non-VA reconsent modules go live? if response.survey_code == PRIMARY_CONSENT_UPDATE_MODULE: reconsent_answer = response.get_single_answer_for(PRIMARY_CONSENT_UPDATE_QUESTION_CODE) - if reconsent_answer and reconsent_answer.value.lower() == COHORT_1_REVIEW_CONSENT_YES_CODE.lower(): + # Per DA-3884 and information from NIH, any reconsent response qualifies (yes/no) + if reconsent_answer: authored_timestamp_list.append(response.authored_datetime) elif response.status == QuestionnaireResponseStatus.COMPLETED: # Assume for other modules, we can use the authored date as long as it's a COMPLETED response diff --git a/rdr_service/resource/generators/participant.py b/rdr_service/resource/generators/participant.py index 7cb4a8781b..9f9698a430 100644 --- a/rdr_service/resource/generators/participant.py +++ b/rdr_service/resource/generators/participant.py @@ -1946,12 +1946,16 @@ def get_consent_pdf_validation_status(p_id, consent_response_rec, from participant_summary where participant_id = {p_id} """ - result = ro_session.execute(ps_sql).first() - if result and (result.consent_for_electronic_health_records_authored and - result.consent_for_electronic_health_records_authored == consent_response_rec.authored and - consent_response_rec.authored > pdf_validation_start_date + ps_result = ro_session.execute(ps_sql).first() + # If there's a m match of the questionnaire response being validated (a "yes" EHR consent) to either the + # current EHR suthored timestamp or the first yes authored timestamp: use status from participant_summary + if ps_result and ( + ps_result.consent_for_electronic_health_records_authored and + (ps_result.consent_for_electronic_health_records_authored == consent_response_rec.authored + or ps_result.consent_for_electronic_health_records_first_yes_authored == \ + consent_response_rec.authored) ): - return BQModuleStatusEnum(result.consent_for_electronic_health_records) + return BQModuleStatusEnum(ps_result.consent_for_electronic_health_records) # Look for consent validation results matching this EHR consent response qr_ids = [] @@ -1980,11 +1984,13 @@ def get_consent_pdf_validation_status(p_id, consent_response_rec, """ result = ro_session.execute(validation_status_sql, {'qr_ids': qr_ids}).fetchone() if result: - if (result.created > pdf_validation_start_date and - result.sync_status in (int(ConsentSyncStatus.NEEDS_CORRECTING), - int(ConsentSyncStatus.OBSOLETE))): + if (consent_response_rec.authored >= pdf_validation_start_date + and result.sync_status in (int(ConsentSyncStatus.NEEDS_CORRECTING), + int(ConsentSyncStatus.OBSOLETE))): status = BQModuleStatusEnum.SUBMITTED_INVALID - elif consent_response_rec.authored and consent_response_rec.authored > pdf_validation_start_date: + # Else: authored consents that precede the validation start date will fall through to return + # the default SUBMITTED status + elif consent_response_rec.authored and consent_response_rec.authored >= pdf_validation_start_date: # No consent_file (consent_response) record yet to match to the questionnaire_response_id status = BQModuleStatusEnum.SUBMITTED_NOT_VALIDATED else: diff --git a/tests/helpers/unittest_base.py b/tests/helpers/unittest_base.py index a3177235b8..c9d3fa1baa 100644 --- a/tests/helpers/unittest_base.py +++ b/tests/helpers/unittest_base.py @@ -500,7 +500,7 @@ def setUp(self, with_data=True, with_consent_codes=False) -> None: logger.addHandler(stream_handler) self.addCleanup(logger.removeHandler, stream_handler) - # Change this to logging.ERROR when you want to see API server errors. + # Change this from logging.CRITICAL to logging.ERROR when you want to see API server errors. logger.setLevel(logging.CRITICAL) if BaseTestCase._first_setup: From d4044cfd64788ef681a3837537986faee870f8a1 Mon Sep 17 00:00:00 2001 From: Peggy Bertsch <65624611+pegbertsch@users.noreply.github.com> Date: Wed, 27 Mar 2024 18:08:32 -0500 Subject: [PATCH 2/3] Updated requirements to resolve unittest issues --- requirements.txt | 2 +- requirements_base.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6a620f7645..62d8d09daf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -r requirements_base.txt -mysqlclient==2.2.0 +mysqlclient==2.2.4 diff --git a/requirements_base.txt b/requirements_base.txt index 675ad3d47c..bc5183d831 100644 --- a/requirements_base.txt +++ b/requirements_base.txt @@ -12,7 +12,7 @@ chardet==3.0.4 # via requests click==8.1.3 # via flask, pip-tools configargparse==1.2.3 # via locust coverage==7.2.5 # via -r requirements.in -cryptography==42.0.4 # via oauthlib, pyopenssl, requests +cryptography==42.0.5 # via oauthlib, pyopenssl, requests defusedxml==0.6.0 # via jira dictalchemy3==1.0.0 # via -r requirements.in dnspython==2.6.1 # via -r requirements.in From ba8018ca5f3dd1867fc399b0930d50d41d45f43c Mon Sep 17 00:00:00 2001 From: Peggy Bertsch <65624611+pegbertsch@users.noreply.github.com> Date: Tue, 2 Apr 2024 14:22:18 -0500 Subject: [PATCH 3/3] Update logic for overriding EHR validation check --- .../offline/retention_eligible_import.py | 12 +++++----- .../questionnaire_response_repository.py | 8 +++---- .../tools/tool_libs/retention_metrics.py | 22 ++++++++++++++----- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/rdr_service/offline/retention_eligible_import.py b/rdr_service/offline/retention_eligible_import.py index c22ee6178a..1425332610 100644 --- a/rdr_service/offline/retention_eligible_import.py +++ b/rdr_service/offline/retention_eligible_import.py @@ -292,11 +292,11 @@ def _create_retention_eligible_metrics_obj_from_row(row, upload_date) -> Retenti ) -def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session, ehr_validation=True): +def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session, check_consent_validation=True): """Fill in the rdr eligibility calculations for comparison""" retention_data = build_retention_data(participant_id=metrics_data.participantId, session=session, - ehr_validation=ehr_validation) + check_consent_validation=check_consent_validation) if retention_data: metrics_data.rdr_retention_eligible = retention_data.is_eligible metrics_data.rdr_retention_eligible_time = retention_data.retention_eligible_date @@ -305,7 +305,7 @@ def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, se metrics_data.rdr_is_passively_retained = retention_data.is_passively_retained -def build_retention_data(participant_id, session, ehr_validation=True) -> Optional[RetentionEligibility]: +def build_retention_data(participant_id, session, check_consent_validation=True) -> Optional[RetentionEligibility]: summary_dao = ParticipantSummaryDao() summary: ParticipantSummary = summary_dao.get_with_session( session=session, @@ -325,7 +325,7 @@ def build_retention_data(participant_id, session, ehr_validation=True) -> Option first_ehr_consent=_get_earliest_intent_for_ehr( session=session, participant_id=summary.participantId, - needs_validation=ehr_validation + needs_validation=check_consent_validation ), is_deceased=summary.deceasedStatus == DeceasedStatus.APPROVED, is_withdrawn=summary.withdrawalStatus != WithdrawalStatus.NOT_WITHDRAWN, @@ -375,11 +375,11 @@ def build_retention_data(participant_id, session, ehr_validation=True) -> Option return RetentionEligibility(dependencies) -def _get_earliest_intent_for_ehr(session, participant_id, needs_validation=False) -> Optional[Consent]: +def _get_earliest_intent_for_ehr(session, participant_id, needs_validation=True) -> Optional[Consent]: date_range_list = QuestionnaireResponseRepository.get_interest_in_sharing_ehr_ranges( participant_id=participant_id, session=session, - validation_not_required=(not needs_validation) + validation_required=needs_validation ) if not date_range_list: return None diff --git a/rdr_service/repository/questionnaire_response_repository.py b/rdr_service/repository/questionnaire_response_repository.py index 12b817a2dc..dc1bc9a229 100644 --- a/rdr_service/repository/questionnaire_response_repository.py +++ b/rdr_service/repository/questionnaire_response_repository.py @@ -162,13 +162,12 @@ def get_validated_ehr_consent_ids(cls, participant_id, session: Session): @classmethod def get_interest_in_sharing_ehr_ranges(cls, participant_id, session: Session, default_authored_datetime=None, - validation_not_required=False): + validation_required=True): """ :param participant_id: Participant id (integer) :param session: A session object for querying data :param default_authored_datetime: An authored timestamp to match to an EHR consent response, if provided - :param validation_not_required: A flag to disable enforcement of successful PDF validation. When this - function is called during calculation of retention eligibility, is set to True + :param validation_required: A flag which can be overridden to disable enforcement of successful PDF validation. """ # Load all EHR and DV_EHR responses @@ -190,9 +189,8 @@ def get_interest_in_sharing_ehr_ranges(cls, participant_id, session: Session, de # Find all ranges where interest in sharing EHR was expressed (DV_EHR) or consent to share was provided ehr_interest_date_ranges = [] - skip_validation_check = (config.getSettingJson('ENROLLMENT_STATUS_SKIP_VALIDATION', False) - or validation_not_required) + or not validation_required) if sharing_response_list: current_date_range = None diff --git a/rdr_service/tools/tool_libs/retention_metrics.py b/rdr_service/tools/tool_libs/retention_metrics.py index b3b6292f05..fb1a1611f2 100644 --- a/rdr_service/tools/tool_libs/retention_metrics.py +++ b/rdr_service/tools/tool_libs/retention_metrics.py @@ -128,8 +128,9 @@ def check_for_rdr_mismatches(cls, rem_rec: RetentionEligibleMetrics): return mismatches @classmethod - def recalculate_rdr_retention(cls, session, rem_rec: RetentionEligibleMetrics): - _supplement_with_rdr_calculations(metrics_data=rem_rec, session=session) + def recalculate_rdr_retention(cls, session, rem_rec: RetentionEligibleMetrics, consent_validation): + _supplement_with_rdr_calculations(metrics_data=rem_rec, session=session, + check_consent_validation=consent_validation) return cls.check_for_rdr_mismatches(rem_rec) def run(self): @@ -146,7 +147,8 @@ def run(self): for pid in pid_list: _logger.info(f'Recalculating P{pid}...') # Error messages will be emitted if there are mismatches after recalculation - self.recalculate_rdr_retention(session, self.get_retention_db_record(session, pid)) + self.recalculate_rdr_retention(session, self.get_retention_db_record(session, pid), + not self.args.no_consent_validation) count += 1 session.commit() @@ -248,7 +250,8 @@ def run(self): elif len(mismatches): # Try recalculating the RDR values to resolve deltas recalculated.append(pid) - mismatches = RetentionRecalcClass.recalculate_rdr_retention(session, db_obj) + mismatches = RetentionRecalcClass.recalculate_rdr_retention(session, db_obj, + not self.args.no_consent_validation) if 'retentionEligible' in mismatches: retention_eligible_mismatches.append(pid) if 'activelyRetained' in mismatches: @@ -333,7 +336,8 @@ def run(): help='action to perform, such as qc') load_parser = subparser.add_parser("load") - load_parser.add_argument("--file-upload-time", help="bucket file upload time (as string) of metrics file", type=str) + load_parser.add_argument("--file-upload-time", + help="bucket file upload time (as string) of metrics file", type=str) qc_parser = subparser.add_parser("qc") qc_parser.add_argument("--bucket-file", help="bucket_file_path", type=str) @@ -341,7 +345,13 @@ def run(): recalc_parser = subparser.add_parser("recalc") recalc_parser.add_argument('--from-file', help='file of ids to recalculate', default='', type=str) # noqa - recalc_parser.add_argument("--id", help="comma-separated list of ids to recalculate", type=str, default=None) + recalc_parser.add_argument("--id", help="comma-separated list of ids to recalculate", + type=str, default=None) + recalc_parser.add_argument("--no-consent-validation", + help="Do not enforce EHR consent validation when calculating metrics", + action='store_true', + ) + args = parser.parse_args() with GCPProcessContext(tool_cmd, args.project, args.account, args.service_account) as gcp_env: