From 8d96581cc19702f8f5e307353eee5ec8205acb3b Mon Sep 17 00:00:00 2001 From: YSK Date: Thu, 8 Aug 2024 18:08:11 -0400 Subject: [PATCH 1/3] removed parent, sorted, added missing program and studies --- src/dug/config.py | 4 +++- src/dug/core/async_search.py | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/dug/config.py b/src/dug/config.py index 93bc678..8b0ec9d 100644 --- a/src/dug/config.py +++ b/src/dug/config.py @@ -143,7 +143,9 @@ def from_env(cls): "redis_port": "REDIS_PORT", "redis_password": "REDIS_PASSWORD", "program_description": "PROGRAM_DESCRIPTION", - "consent_id_path": "CONSENT_ID_PATH" + "consent_id_path": "CONSENT_ID_PATH", + "missing_studies_path": "MISSING_STUDIES_PATH", + "missing_program_path": "MISSING_PROGRAM_PATH" } kwargs = {} diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 4ba27cb..e01fbc6 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -523,7 +523,7 @@ async def search_program(self, program_name=None, offset=0, size=None): # Append the details to the list in the desired format collection_details_list.append(collection_details) - + with open(self._cfg.consent_id_path, 'r') as file: consent_id_mappings = json.load(file) # Add consent_id to the study @@ -540,8 +540,9 @@ async def search_program(self, program_name=None, offset=0, size=None): else: updated_studies.append(study) - return updated_studies + #Adding missing studies + @@ -573,7 +574,16 @@ async def search_program_list(self): # The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response unique_data_types = search_results['aggregations']['unique_program_names']['buckets'] data=unique_data_types - print(data) + + #Remove Parent program and add Training program + + data = [item for item in data if item['key'] != 'Parent'] + + with open(self._cfg.missing_program_path, 'r') as file: + missing_programs = json.load(file) + data.append(missing_programs) + + # Sorting the data alphabetically based on 'key' sorted_data = sorted(data, key=lambda x: x['key']) From 5693c0a434e0dd9b77084ffc8612df6ecb8ef9aa Mon Sep 17 00:00:00 2001 From: YSK Date: Thu, 8 Aug 2024 18:10:03 -0400 Subject: [PATCH 2/3] added missing program and studies --- src/dug/config.py | 2 ++ src/dug/core/async_search.py | 23 ++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/dug/config.py b/src/dug/config.py index 8b0ec9d..426ff29 100644 --- a/src/dug/config.py +++ b/src/dug/config.py @@ -30,6 +30,8 @@ class Config: program_sort_list: str = "" program_description: dict=field(default_factory=lambda:{}) consent_id_path: str= "" + missing_studies_path: str="" + missing_program_path: str="" # Preprocessor config that will be passed to annotate.Preprocessor constructor diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index e01fbc6..8123be4 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -524,6 +524,9 @@ async def search_program(self, program_name=None, offset=0, size=None): collection_details_list.append(collection_details) + + + #Adding consent to the studies with open(self._cfg.consent_id_path, 'r') as file: consent_id_mappings = json.load(file) # Add consent_id to the study @@ -539,12 +542,22 @@ async def search_program(self, program_name=None, offset=0, size=None): updated_studies.append(updated_study) else: updated_studies.append(study) + #Adding missing studies - - + with open(self._cfg.missing_studies_path, 'r') as file: + missing_studies = json.load(file) + for program in missing_studies: + print(program_name) + print("\n\n",program) + if program_name.lower() == program['program_name'].lower(): + print("\n\n it matches") + updated_studies.append(program['collections']) + print(program['collections']) + + return updated_studies async def search_program_list(self): @@ -581,11 +594,11 @@ async def search_program_list(self): with open(self._cfg.missing_program_path, 'r') as file: missing_programs = json.load(file) - data.append(missing_programs) - + data.extend(missing_programs) + # Sorting the data alphabetically based on 'key' - sorted_data = sorted(data, key=lambda x: x['key']) + sorted_data = sorted(data, key=lambda x: (x['key'].casefold(), x['key'][1:])) #Add description as another field in exisiting data based on the program name descriptions_json = self._cfg.program_description From 5597f70b73023ab1804011cf21f5e0e58cf8bf12 Mon Sep 17 00:00:00 2001 From: YSK Date: Thu, 8 Aug 2024 18:12:25 -0400 Subject: [PATCH 3/3] replaced sort --- src/dug/core/async_search.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 8123be4..37b527c 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -550,12 +550,9 @@ async def search_program(self, program_name=None, offset=0, size=None): with open(self._cfg.missing_studies_path, 'r') as file: missing_studies = json.load(file) for program in missing_studies: - print(program_name) - print("\n\n",program) if program_name.lower() == program['program_name'].lower(): - print("\n\n it matches") updated_studies.append(program['collections']) - print(program['collections']) + return updated_studies