From 75f85c4913271495371b92dc1f4d81dcb3d6c292 Mon Sep 17 00:00:00 2001
From: Peggy Bertsch <65624611+pegbertsch@users.noreply.github.com>
Date: Wed, 27 Mar 2024 14:55:57 -0500
Subject: [PATCH 1/3] [DA-3884] Updates to retention calculator logic per NIH

---
 .../offline/retention_eligible_import.py      | 22 ++++++++++-------
 .../resource/generators/participant.py        | 24 ++++++++++++-------
 tests/helpers/unittest_base.py                |  2 +-
 3 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/rdr_service/offline/retention_eligible_import.py b/rdr_service/offline/retention_eligible_import.py
index 2623c89c98..26e79b7f69 100644
--- a/rdr_service/offline/retention_eligible_import.py
+++ b/rdr_service/offline/retention_eligible_import.py
@@ -7,7 +7,7 @@
 from rdr_service.app_util import nonprod
 from rdr_service.clock import CLOCK
 from rdr_service.code_constants import (
-    COHORT_1_REVIEW_CONSENT_YES_CODE, COPE_MODULE, COPE_NOV_MODULE, COPE_DEC_MODULE, COPE_FEB_MODULE,
+    COPE_MODULE, COPE_NOV_MODULE, COPE_DEC_MODULE, COPE_FEB_MODULE,
     COPE_VACCINE_MINUTE_1_MODULE_CODE, COPE_VACCINE_MINUTE_2_MODULE_CODE, COPE_VACCINE_MINUTE_3_MODULE_CODE,
     COPE_VACCINE_MINUTE_4_MODULE_CODE, PRIMARY_CONSENT_UPDATE_MODULE, PRIMARY_CONSENT_UPDATE_QUESTION_CODE
 )
@@ -253,10 +253,11 @@ def _create_retention_eligible_metrics_obj_from_row(row, upload_date) -> Retenti
     )
 
 
-def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session):
+def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session, ehr_validation=True):
     """Fill in the rdr eligibility calculations for comparison"""
 
-    retention_data = build_retention_data(participant_id=metrics_data.participantId, session=session)
+    retention_data = build_retention_data(participant_id=metrics_data.participantId, session=session,
+                                          ehr_validation=ehr_validation)
     if retention_data:
         metrics_data.rdr_retention_eligible = retention_data.is_eligible
         metrics_data.rdr_retention_eligible_time = retention_data.retention_eligible_date
@@ -265,7 +266,7 @@ def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, se
         metrics_data.rdr_is_passively_retained = retention_data.is_passively_retained
 
 
-def build_retention_data(participant_id, session) -> Optional[RetentionEligibility]:
+def build_retention_data(participant_id, session, ehr_validation=True) -> Optional[RetentionEligibility]:
     summary_dao = ParticipantSummaryDao()
     summary: ParticipantSummary = summary_dao.get_with_session(
         session=session,
@@ -284,7 +285,8 @@ def build_retention_data(participant_id, session) -> Optional[RetentionEligibili
         ),
         first_ehr_consent=_get_earliest_intent_for_ehr(
             session=session,
-            participant_id=summary.participantId
+            participant_id=summary.participantId,
+            needs_validation=ehr_validation
         ),
         is_deceased=summary.deceasedStatus == DeceasedStatus.APPROVED,
         is_withdrawn=summary.withdrawalStatus != WithdrawalStatus.NOT_WITHDRAWN,
@@ -322,7 +324,8 @@ def build_retention_data(participant_id, session) -> Optional[RetentionEligibili
             aggregate_function=min  # Get the earliest cohort 1 reconsent response
         ),
         gror_response_timestamp=summary.consentForGenomicsRORAuthored,
-        # Additions for DA-3705 (only NPH module consent info currently available is NPH1)
+        # Per DA-3884:  NPH activity included for active retention calculations is limited to the initial NPH consent
+        # submission sent by PTSC to Participant endpoint (before eligibility or opt-ins are sent to NPH endpoints)
         nph_consent_timestamp=summary.consentForNphModule1Authored,
         etm_consent_timestamp=summary.consentForEtMAuthored,
         wear_consent_timestamp=summary.consentForWearStudyAuthored,
@@ -333,11 +336,11 @@ def build_retention_data(participant_id, session) -> Optional[RetentionEligibili
     return RetentionEligibility(dependencies)
 
 
-def _get_earliest_intent_for_ehr(session, participant_id) -> Optional[Consent]:
+def _get_earliest_intent_for_ehr(session, participant_id, needs_validation=False) -> Optional[Consent]:
     date_range_list = QuestionnaireResponseRepository.get_interest_in_sharing_ehr_ranges(
         participant_id=participant_id,
         session=session,
-        validation_not_required=True
+        validation_not_required=(not needs_validation)
     )
     if not date_range_list:
         return None
@@ -363,7 +366,8 @@ def _aggregate_response_timestamps(session, participant_id, survey_code_list, ag
             # NOTE: This may need more extensive changes when VA/Non-VA reconsent modules go live?
             if response.survey_code == PRIMARY_CONSENT_UPDATE_MODULE:
                 reconsent_answer = response.get_single_answer_for(PRIMARY_CONSENT_UPDATE_QUESTION_CODE)
-                if reconsent_answer and reconsent_answer.value.lower() == COHORT_1_REVIEW_CONSENT_YES_CODE.lower():
+                # Per DA-3884 and information from NIH, any reconsent response qualifies (yes/no)
+                if reconsent_answer:
                     authored_timestamp_list.append(response.authored_datetime)
             elif response.status == QuestionnaireResponseStatus.COMPLETED:
                 # Assume for other modules, we can use the authored date as long as it's a COMPLETED response
diff --git a/rdr_service/resource/generators/participant.py b/rdr_service/resource/generators/participant.py
index 7cb4a8781b..9f9698a430 100644
--- a/rdr_service/resource/generators/participant.py
+++ b/rdr_service/resource/generators/participant.py
@@ -1946,12 +1946,16 @@ def get_consent_pdf_validation_status(p_id, consent_response_rec,
                 from participant_summary
                 where participant_id = {p_id}
              """
-            result = ro_session.execute(ps_sql).first()
-            if result and (result.consent_for_electronic_health_records_authored and
-                           result.consent_for_electronic_health_records_authored == consent_response_rec.authored and
-                           consent_response_rec.authored > pdf_validation_start_date
+            ps_result = ro_session.execute(ps_sql).first()
+            # If there's a m match of the questionnaire response being validated (a "yes" EHR consent) to either the
+            # current EHR suthored timestamp or the first yes authored timestamp: use status from participant_summary
+            if ps_result and (
+                ps_result.consent_for_electronic_health_records_authored and
+                (ps_result.consent_for_electronic_health_records_authored == consent_response_rec.authored
+                    or ps_result.consent_for_electronic_health_records_first_yes_authored == \
+                       consent_response_rec.authored)
             ):
-                return BQModuleStatusEnum(result.consent_for_electronic_health_records)
+                return BQModuleStatusEnum(ps_result.consent_for_electronic_health_records)
 
             # Look for consent validation results matching this EHR consent response
             qr_ids = []
@@ -1980,11 +1984,13 @@ def get_consent_pdf_validation_status(p_id, consent_response_rec,
             """
             result = ro_session.execute(validation_status_sql, {'qr_ids': qr_ids}).fetchone()
             if result:
-                if (result.created > pdf_validation_start_date and
-                       result.sync_status in (int(ConsentSyncStatus.NEEDS_CORRECTING),
-                                              int(ConsentSyncStatus.OBSOLETE))):
+                if (consent_response_rec.authored >= pdf_validation_start_date
+                       and result.sync_status in (int(ConsentSyncStatus.NEEDS_CORRECTING),
+                                                  int(ConsentSyncStatus.OBSOLETE))):
                     status = BQModuleStatusEnum.SUBMITTED_INVALID
-            elif consent_response_rec.authored and consent_response_rec.authored > pdf_validation_start_date:
+                    # Else: authored consents that precede the validation start date will fall through to return
+                    # the default SUBMITTED status
+            elif consent_response_rec.authored and consent_response_rec.authored >= pdf_validation_start_date:
                 # No consent_file (consent_response) record yet to match to the questionnaire_response_id
                 status = BQModuleStatusEnum.SUBMITTED_NOT_VALIDATED
         else:
diff --git a/tests/helpers/unittest_base.py b/tests/helpers/unittest_base.py
index a3177235b8..c9d3fa1baa 100644
--- a/tests/helpers/unittest_base.py
+++ b/tests/helpers/unittest_base.py
@@ -500,7 +500,7 @@ def setUp(self, with_data=True, with_consent_codes=False) -> None:
         logger.addHandler(stream_handler)
         self.addCleanup(logger.removeHandler, stream_handler)
 
-        # Change this to logging.ERROR when you want to see API server errors.
+        # Change this from logging.CRITICAL to logging.ERROR when you want to see API server errors.
         logger.setLevel(logging.CRITICAL)
 
         if BaseTestCase._first_setup:

From d4044cfd64788ef681a3837537986faee870f8a1 Mon Sep 17 00:00:00 2001
From: Peggy Bertsch <65624611+pegbertsch@users.noreply.github.com>
Date: Wed, 27 Mar 2024 18:08:32 -0500
Subject: [PATCH 2/3] Updated requirements to resolve unittest issues

---
 requirements.txt      | 2 +-
 requirements_base.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6a620f7645..62d8d09daf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 -r requirements_base.txt
 
-mysqlclient==2.2.0
+mysqlclient==2.2.4
diff --git a/requirements_base.txt b/requirements_base.txt
index 675ad3d47c..bc5183d831 100644
--- a/requirements_base.txt
+++ b/requirements_base.txt
@@ -12,7 +12,7 @@ chardet==3.0.4            # via requests
 click==8.1.3              # via flask, pip-tools
 configargparse==1.2.3     # via locust
 coverage==7.2.5             # via -r requirements.in
-cryptography==42.0.4       # via oauthlib, pyopenssl, requests
+cryptography==42.0.5       # via oauthlib, pyopenssl, requests
 defusedxml==0.6.0         # via jira
 dictalchemy3==1.0.0       # via -r requirements.in
 dnspython==2.6.1         # via -r requirements.in

From ba8018ca5f3dd1867fc399b0930d50d41d45f43c Mon Sep 17 00:00:00 2001
From: Peggy Bertsch <65624611+pegbertsch@users.noreply.github.com>
Date: Tue, 2 Apr 2024 14:22:18 -0500
Subject: [PATCH 3/3] Update logic for overriding EHR validation check

---
 .../offline/retention_eligible_import.py      | 12 +++++-----
 .../questionnaire_response_repository.py      |  8 +++----
 .../tools/tool_libs/retention_metrics.py      | 22 ++++++++++++++-----
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/rdr_service/offline/retention_eligible_import.py b/rdr_service/offline/retention_eligible_import.py
index c22ee6178a..1425332610 100644
--- a/rdr_service/offline/retention_eligible_import.py
+++ b/rdr_service/offline/retention_eligible_import.py
@@ -292,11 +292,11 @@ def _create_retention_eligible_metrics_obj_from_row(row, upload_date) -> Retenti
     )
 
 
-def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session, ehr_validation=True):
+def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, session, check_consent_validation=True):
     """Fill in the rdr eligibility calculations for comparison"""
 
     retention_data = build_retention_data(participant_id=metrics_data.participantId, session=session,
-                                          ehr_validation=ehr_validation)
+                                          check_consent_validation=check_consent_validation)
     if retention_data:
         metrics_data.rdr_retention_eligible = retention_data.is_eligible
         metrics_data.rdr_retention_eligible_time = retention_data.retention_eligible_date
@@ -305,7 +305,7 @@ def _supplement_with_rdr_calculations(metrics_data: RetentionEligibleMetrics, se
         metrics_data.rdr_is_passively_retained = retention_data.is_passively_retained
 
 
-def build_retention_data(participant_id, session, ehr_validation=True) -> Optional[RetentionEligibility]:
+def build_retention_data(participant_id, session, check_consent_validation=True) -> Optional[RetentionEligibility]:
     summary_dao = ParticipantSummaryDao()
     summary: ParticipantSummary = summary_dao.get_with_session(
         session=session,
@@ -325,7 +325,7 @@ def build_retention_data(participant_id, session, ehr_validation=True) -> Option
         first_ehr_consent=_get_earliest_intent_for_ehr(
             session=session,
             participant_id=summary.participantId,
-            needs_validation=ehr_validation
+            needs_validation=check_consent_validation
         ),
         is_deceased=summary.deceasedStatus == DeceasedStatus.APPROVED,
         is_withdrawn=summary.withdrawalStatus != WithdrawalStatus.NOT_WITHDRAWN,
@@ -375,11 +375,11 @@ def build_retention_data(participant_id, session, ehr_validation=True) -> Option
     return RetentionEligibility(dependencies)
 
 
-def _get_earliest_intent_for_ehr(session, participant_id, needs_validation=False) -> Optional[Consent]:
+def _get_earliest_intent_for_ehr(session, participant_id, needs_validation=True) -> Optional[Consent]:
     date_range_list = QuestionnaireResponseRepository.get_interest_in_sharing_ehr_ranges(
         participant_id=participant_id,
         session=session,
-        validation_not_required=(not needs_validation)
+        validation_required=needs_validation
     )
     if not date_range_list:
         return None
diff --git a/rdr_service/repository/questionnaire_response_repository.py b/rdr_service/repository/questionnaire_response_repository.py
index 12b817a2dc..dc1bc9a229 100644
--- a/rdr_service/repository/questionnaire_response_repository.py
+++ b/rdr_service/repository/questionnaire_response_repository.py
@@ -162,13 +162,12 @@ def get_validated_ehr_consent_ids(cls, participant_id, session: Session):
 
     @classmethod
     def get_interest_in_sharing_ehr_ranges(cls, participant_id, session: Session, default_authored_datetime=None,
-                                           validation_not_required=False):
+                                           validation_required=True):
         """
         :param participant_id:  Participant id (integer)
         :param session:  A session object for querying data
         :param default_authored_datetime: An authored timestamp to match to an EHR consent response, if provided
-        :param validation_not_required: A flag to disable enforcement of successful PDF validation.   When this
-                                        function is called during calculation of retention eligibility, is set to True
+        :param validation_required: A flag which can be overridden to disable enforcement of successful PDF validation.
 
         """
         # Load all EHR and DV_EHR responses
@@ -190,9 +189,8 @@ def get_interest_in_sharing_ehr_ranges(cls, participant_id, session: Session, de
 
         # Find all ranges where interest in sharing EHR was expressed (DV_EHR) or consent to share was provided
         ehr_interest_date_ranges = []
-
         skip_validation_check = (config.getSettingJson('ENROLLMENT_STATUS_SKIP_VALIDATION', False)
-                                 or validation_not_required)
+                                 or not validation_required)
         if sharing_response_list:
 
             current_date_range = None
diff --git a/rdr_service/tools/tool_libs/retention_metrics.py b/rdr_service/tools/tool_libs/retention_metrics.py
index b3b6292f05..fb1a1611f2 100644
--- a/rdr_service/tools/tool_libs/retention_metrics.py
+++ b/rdr_service/tools/tool_libs/retention_metrics.py
@@ -128,8 +128,9 @@ def check_for_rdr_mismatches(cls, rem_rec: RetentionEligibleMetrics):
         return mismatches
 
     @classmethod
-    def recalculate_rdr_retention(cls, session, rem_rec: RetentionEligibleMetrics):
-        _supplement_with_rdr_calculations(metrics_data=rem_rec, session=session)
+    def recalculate_rdr_retention(cls, session, rem_rec: RetentionEligibleMetrics, consent_validation):
+        _supplement_with_rdr_calculations(metrics_data=rem_rec, session=session,
+                                          check_consent_validation=consent_validation)
         return cls.check_for_rdr_mismatches(rem_rec)
 
     def run(self):
@@ -146,7 +147,8 @@ def run(self):
                 for pid in pid_list:
                     _logger.info(f'Recalculating P{pid}...')
                     # Error messages will be emitted if there are mismatches after recalculation
-                    self.recalculate_rdr_retention(session, self.get_retention_db_record(session, pid))
+                    self.recalculate_rdr_retention(session, self.get_retention_db_record(session, pid),
+                                                   not self.args.no_consent_validation)
                     count += 1
 
                 session.commit()
@@ -248,7 +250,8 @@ def run(self):
                 elif len(mismatches):
                     # Try recalculating the RDR values to resolve deltas
                     recalculated.append(pid)
-                    mismatches = RetentionRecalcClass.recalculate_rdr_retention(session, db_obj)
+                    mismatches = RetentionRecalcClass.recalculate_rdr_retention(session, db_obj,
+                                                                                not self.args.no_consent_validation)
                     if 'retentionEligible' in mismatches:
                         retention_eligible_mismatches.append(pid)
                     if 'activelyRetained' in mismatches:
@@ -333,7 +336,8 @@ def run():
                                       help='action to perform, such as qc')
 
     load_parser = subparser.add_parser("load")
-    load_parser.add_argument("--file-upload-time", help="bucket file upload time (as string) of metrics file", type=str)
+    load_parser.add_argument("--file-upload-time",
+                             help="bucket file upload time (as string) of metrics file", type=str)
 
     qc_parser = subparser.add_parser("qc")
     qc_parser.add_argument("--bucket-file", help="bucket_file_path", type=str)
@@ -341,7 +345,13 @@ def run():
 
     recalc_parser = subparser.add_parser("recalc")
     recalc_parser.add_argument('--from-file', help='file of ids to recalculate', default='', type=str)  # noqa
-    recalc_parser.add_argument("--id", help="comma-separated list of ids to recalculate", type=str, default=None)
+    recalc_parser.add_argument("--id", help="comma-separated list of ids to recalculate",
+                               type=str, default=None)
+    recalc_parser.add_argument("--no-consent-validation",
+                               help="Do not enforce EHR consent validation when calculating metrics",
+                               action='store_true',
+                               )
+
     args = parser.parse_args()
 
     with GCPProcessContext(tool_cmd, args.project, args.account, args.service_account) as gcp_env: