Skip to content

Commit

Permalink
Merge branch 'develop' into dug-program-names-and-ner
Browse files Browse the repository at this point in the history
  • Loading branch information
YaphetKG authored Jun 28, 2024
2 parents 807aafa + 9dbc791 commit 0bac2c0
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 4 deletions.
6 changes: 6 additions & 0 deletions src/dug/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ class Config:
nboost_host: str = "nboost"
nboost_port: int = 8000

program_sort_list: str = ""
program_name_mappings: dict=field(
default_factory=lambda:{})

# Preprocessor config that will be passed to annotate.Preprocessor constructor
preprocessor: dict = field(
default_factory=lambda: {
Expand Down Expand Up @@ -137,6 +141,8 @@ def from_env(cls):
"redis_host": "REDIS_HOST",
"redis_port": "REDIS_PORT",
"redis_password": "REDIS_PASSWORD",
"program_sort_list": "PROGRAM_SORT_LIST",
"program_name_mappings" : "PROGRAM_NAME_MAPPINGS"
}

kwargs = {}
Expand Down
12 changes: 10 additions & 2 deletions src/dug/core/async_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
from elasticsearch import AsyncElasticsearch
from elasticsearch.helpers import async_scan
import ssl
import ssl,os,json

from dug.config import Config

Expand Down Expand Up @@ -553,7 +553,15 @@ async def search_program_list(self):
)
# The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response
unique_data_types = search_results['aggregations']['unique_program_names']['buckets']

data=unique_data_types
program_keys =self._cfg.program_sort_list.split(',')
#key_mapping = self._cfg.program_name_mappings
#key_mapping = json.loads(key_mapping)
key_index_map = {key: index for index, key in enumerate(program_keys)}
unique_data_types = sorted(data, key=lambda x: key_index_map.get(x['key'], len(program_keys)))
#for item in unique_data_types:
# if item['key'] in key_mapping:
# item['key'] = key_mapping[item['key']]
return unique_data_types


Expand Down
3 changes: 3 additions & 0 deletions src/dug/core/parsers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .bacpac_parser import BACPACParser
from .heal_dp_parser import HEALDPParser
from .ctn_parser import CTNParser
from .radx_parser import RADxParser


logger = logging.getLogger('dug')
Expand Down Expand Up @@ -45,6 +46,8 @@ def define_parsers(parser_dict: Dict[str, Parser]):
parser_dict["recover"] = RECOVERDBGaPParser()
parser_dict["topmeddbgap"] = TopmedDBGaPParser()
parser_dict["curesc"] = CureSC()
parser_dict["radx"] = RADxParser()




Expand Down
36 changes: 36 additions & 0 deletions src/dug/core/parsers/radx_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import logging
from typing import List
from xml.etree import ElementTree as ET

from dug import utils as utils
from ._base import DugElement, FileParser, Indexable, InputFile

logger = logging.getLogger('dug')


class RADxParser(FileParser):

def __call__(self, input_file: InputFile) -> List[Indexable]:
tree = ET.parse(input_file, ET.XMLParser(encoding='utf-8'))
root = tree.getroot()
study_id = root.attrib['id']
# If still None, raise an error message
study_name = root.attrib['study_name']
elements = []
for variable in root.iter('variable'):
desc = variable.find('description').text if variable.find('description') is not None else ''
desc = desc or ''
elem = DugElement(elem_id=f"{variable.attrib['id']}",
name=variable.find('name').text,
desc=desc,
elem_type=root.attrib['module'],
collection_id=f"{study_id}",
collection_name=study_name)

# Create DBGaP links as study/variable actions
elem.collection_action = utils.get_dbgap_study_link(study_id=elem.collection_id)
logger.debug(elem)
elements.append(elem)

# You don't actually create any concepts
return elements
4 changes: 2 additions & 2 deletions src/dug/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ async def search_program( program_name: Optional[str] = None):
@APP.get('/program_list')
async def get_program_list():
"""
Search for studies by unique_id (ID or name) and/or study_name.
Search for program by program name.
"""
result = await search.search_program_list()
return {
Expand All @@ -159,4 +159,4 @@ async def get_program_list():
"status": "success"
}
if __name__ == '__main__':
uvicorn.run(APP)
uvicorn.run(APP,port=8181)

0 comments on commit 0bac2c0

Please sign in to comment.