Skip to content

Commit

Permalink
Merge pull request #337 from helxplatform/patch-nrslv-resp
Browse files Browse the repository at this point in the history
Patch nrslv resp
  • Loading branch information
YaphetKG authored Jan 25, 2024
2 parents 1c42441 + 275abcb commit 073aefa
Show file tree
Hide file tree
Showing 29 changed files with 265 additions and 805 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-push-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ on:
- .dockerignore
- .githooks
tags-ignore:
- 'v[0-9]+.[0-9]+.*'
- '*'
jobs:
build-push-release:
runs-on: ubuntu-latest
Expand Down
83 changes: 44 additions & 39 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,45 +66,6 @@ jobs:
# flake8 --ignore=E,W --exit-zero .
continue-on-error: true

# ############################## build-vuln-test ##############################
# build-vuln-test:
# # needs: flake8-linter
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3

# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# with:
# driver-opts: |
# network=host

# - name: Login to DockerHub
# uses: docker/login-action@v3
# with:
# username: ${{ secrets.DOCKERHUB_USERNAME }}
# password: ${{ secrets.DOCKERHUB_TOKEN }}
# logout: true

# # Notes on Cache:
# # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
# - name: Build Container
# uses: docker/build-push-action@v5
# with:
# context: .
# push: false
# load: true
# tag: ${{ github.repository }}:vuln-test
# cache-from: type=registry,ref=${{ github.repository }}:buildcache
# cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
# ####### Run for Fidelity ######
# - name: Run Trivy vulnerability scanner
# uses: aquasecurity/trivy-action@master
# with:
# image-ref: '${{ github.repository }}:vuln-test'
# severity: 'CRITICAL,HIGH'
# exit-code: '1'

################################### PYTEST ###################################
pytest:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -145,3 +106,47 @@ jobs:
- name: Test with Bandit
run: |
bandit -r src -n3 -lll
############################## test-image-build ##############################
test-image-build:
runs-on: ubuntu-latest
# if: ${{ github.actor == 'dependabot[bot]' }}
steps:
- uses: actions/checkout@v3

- name: Set short git commit SHA
id: vars
run: |
echo "short_sha=$(git rev-parse --short ${{ github.sha }})" >> $GITHUB_OUTPUT
# https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/

- name: Confirm git commit SHA output
run: echo ${{ steps.vars.outputs.short_sha }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
logout: true

- name: Parse Github Reference Name
id: branch
run: |
REF=${{ github.ref_name }}
echo "GHR=${REF%/*}" >> $GITHUB_OUTPUT
# Notes on Cache:
# https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
- name: Build Container
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: |
${{ github.repository }}:test_${{ steps.branch.outputs.GHR }}
cache-from: type=registry,ref=${{ github.repository }}:buildcache
cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
10 changes: 7 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
# A container for the core semantic-search capability.
#
######################################################
FROM python:3.12.0-alpine3.18
FROM python:3.12.1-alpine3.19


# Install required packages
RUN apk update && \
apk add g++ make
apk add g++ make

#upgrade openssl \
RUN apk add openssl=3.1.4-r4

RUN pip install --upgrade pip
# Create a non-root user.
Expand All @@ -31,4 +35,4 @@ RUN make install
RUN make install.dug

# Run it
ENTRYPOINT dug
ENTRYPOINT dug
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ services:
##
#################################################################################
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.5.2
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
networks:
- dug-network
environment:
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@ elasticsearch[async]==8.5.2
gunicorn
itsdangerous
Jinja2
jsonpickle
jsonschema
MarkupSafe
ormar
mistune
pluggy
pyrsistent
pytest
pytest-asyncio
pytz
PyYAML
requests
Expand All @@ -26,4 +28,4 @@ click
httpx
linkml-runtime==1.6.0
bmt==1.1.0
urllib3
urllib3
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ classifiers =
package_dir =
= src
packages = find:
python_requires = >=3.12
python_requires = >=3.10
include_package_data = true
install_requires =
elasticsearch==8.5.2
Expand Down
2 changes: 1 addition & 1 deletion src/dug/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def get_argparser():
'-a', '--annotator',
help='Annotator used to annotate identifiers in crawl file',
dest="annotator_type",
default="annotator-monarch"
default="monarch"
)

crawl_parser.add_argument(
Expand Down
137 changes: 83 additions & 54 deletions src/dug/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
@dataclass
class Config:
"""
TODO: Populate description
TODO: Populate description
"""

elastic_password: str = "changeme"
redis_password: str = "changeme"

Expand All @@ -27,74 +28,102 @@ class Config:
nboost_port: int = 8000

# Preprocessor config that will be passed to annotate.Preprocessor constructor
preprocessor: dict = field(default_factory=lambda: {
"debreviator": {
"BMI": "body mass index"
},
"stopwords": ["the"]
})

preprocessor: dict = field(
default_factory=lambda: {
"debreviator": {"BMI": "body mass index"},
"stopwords": ["the"],
}
)
annotator_type: str = "monarch"
# Annotator config that will be passed to annotate.Annotator constructor
annotator: dict = field(default_factory=lambda: {
"url": "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
})
annotator_args: dict = field(
default_factory=lambda: {
"monarch": {
"url": "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
},
"sapbert": {
"classification_url": "https://med-nemo.apps.renci.org/annotate/",
"annotator_url": "https://babel-sapbert.apps.renci.org/annotate/",
},
}
)

# Normalizer config that will be passed to annotate.Normalizer constructor
normalizer: dict = field(default_factory=lambda: {
"url": "https://nodenormalization-dev.apps.renci.org/get_normalized_nodes?conflate=false&description=true&curie="
})
normalizer: dict = field(
default_factory=lambda: {
"url": "https://nodenormalization-dev.apps.renci.org/get_normalized_nodes?conflate=false&description=true&curie="
}
)

# Synonym service config that will be passed to annotate.SynonymHelper constructor
synonym_service: dict = field(default_factory=lambda: {
"url": "https://name-resolution-sri.renci.org/reverse_lookup"
})
synonym_service: dict = field(
default_factory=lambda: {
"url": "https://name-resolution-sri.renci.org/reverse_lookup"
}
)

# Ontology metadata helper config that will be passed to annotate.OntologyHelper constructor
ontology_helper: dict = field(default_factory=lambda: {
"url": "https://api.monarchinitiative.org/api/bioentity/"
})
ontology_helper: dict = field(
default_factory=lambda: {
"url": "https://api.monarchinitiative.org/api/bioentity/"
}
)

# Redlist of identifiers not to expand via TranQL
tranql_exclude_identifiers: list = field(default_factory=lambda: ["CHEBI:17336"])

tranql_queries: dict = field(default_factory=lambda: {
"disease": ["disease", "phenotypic_feature"],
"pheno": ["phenotypic_feature", "disease"],
"anat": ["disease", "anatomical_entity"],
"chem_to_disease": ["chemical_entity", "disease"],
"small_molecule_to_disease": ["small_molecule", "disease"],
"chemical_mixture_to_disease": ["chemical_mixture", "disease"],
"phen_to_anat": ["phenotypic_feature", "anatomical_entity"],
})

node_to_element_queries: dict = field(default_factory=lambda: {
# Dug element type to cast the query kg nodes to
"cde": {
# Parse nodes matching criteria in kg
"node_type": "biolink:Publication",
"curie_prefix": "HEALCDE",
# list of attributes that are lists to be casted to strings
"list_field_choose_first": [
"files"
],
"attribute_mapping": {
# "DugElement Attribute" : "KG Node attribute"
"name": "name",
"desc": "summary",
"collection_name": "cde_category",
"collection_id": "cde_category",
"action": "files"
tranql_queries: dict = field(
default_factory=lambda: {
"disease": ["disease", "phenotypic_feature"],
"pheno": ["phenotypic_feature", "disease"],
"anat": ["disease", "anatomical_entity"],
"chem_to_disease": ["chemical_entity", "disease"],
"small_molecule_to_disease": ["small_molecule", "disease"],
"chemical_mixture_to_disease": ["chemical_mixture", "disease"],
"phen_to_anat": ["phenotypic_feature", "anatomical_entity"],
}
)

node_to_element_queries: dict = field(
default_factory=lambda: {
# Dug element type to cast the query kg nodes to
"cde": {
# Parse nodes matching criteria in kg
"node_type": "biolink:Publication",
"curie_prefix": "HEALCDE",
# list of attributes that are lists to be casted to strings
"list_field_choose_first": ["files"],
"attribute_mapping": {
# "DugElement Attribute" : "KG Node attribute"
"name": "name",
"desc": "summary",
"collection_name": "cde_category",
"collection_id": "cde_category",
"action": "files",
},
}
}
})
)

concept_expander: dict = field(default_factory=lambda: {
"url": "https://tranql-dev.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
"min_tranql_score": 0.0
})
concept_expander: dict = field(
default_factory=lambda: {
"url": "https://tranql-dev.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
"min_tranql_score": 0.0,
}
)

# List of ontology types that can be used even if they fail normalization
ontology_greenlist: list = field(default_factory=lambda: ["PATO", "CHEBI", "MONDO", "UBERON", "HP", "MESH", "UMLS"])
ontology_greenlist: list = field(
default_factory=lambda: [
"PATO",
"CHEBI",
"MONDO",
"UBERON",
"HP",
"MESH",
"UMLS",
]
)

@classmethod
def from_env(cls):
Expand All @@ -107,7 +136,7 @@ def from_env(cls):
"elastic_password": "ELASTIC_PASSWORD",
"redis_host": "REDIS_HOST",
"redis_port": "REDIS_PORT",
"redis_password": "REDIS_PASSWORD"
"redis_password": "REDIS_PASSWORD",
}

kwargs = {}
Expand Down
2 changes: 1 addition & 1 deletion src/dug/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def crawl(self, target_name: str, parser_type: str, annotator_type: str, element

pm = get_plugin_manager()
parser = get_parser(pm.hook, parser_type)
annotator = get_annotator(pm.hook, annotator_type)
annotator = get_annotator(pm.hook, annotator_type, self._factory.config)
targets = get_targets(target_name)

for target in targets:
Expand Down
Loading

0 comments on commit 073aefa

Please sign in to comment.