Skip to content

Commit

Permalink
Merge branch 'sort_pickle_lists' into patch-nrslv-resp
Browse files Browse the repository at this point in the history
  • Loading branch information
YaphetKG committed Jan 24, 2024
2 parents d7257df + f3d9411 commit 92cec85
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-push-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ on:
- .dockerignore
- .githooks
tags-ignore:
- 'v[0-9]+.[0-9]+.*'
- '*'
jobs:
build-push-release:
runs-on: ubuntu-latest
Expand Down
83 changes: 44 additions & 39 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,45 +66,6 @@ jobs:
# flake8 --ignore=E,W --exit-zero .
continue-on-error: true

# ############################## build-vuln-test ##############################
# build-vuln-test:
# # needs: flake8-linter
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3

# - name: Set up Docker Buildx
# uses: docker/setup-buildx-action@v3
# with:
# driver-opts: |
# network=host

# - name: Login to DockerHub
# uses: docker/login-action@v3
# with:
# username: ${{ secrets.DOCKERHUB_USERNAME }}
# password: ${{ secrets.DOCKERHUB_TOKEN }}
# logout: true

# # Notes on Cache:
# # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
# - name: Build Container
# uses: docker/build-push-action@v5
# with:
# context: .
# push: false
# load: true
# tag: ${{ github.repository }}:vuln-test
# cache-from: type=registry,ref=${{ github.repository }}:buildcache
# cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
# ####### Run for Fidelity ######
# - name: Run Trivy vulnerability scanner
# uses: aquasecurity/trivy-action@master
# with:
# image-ref: '${{ github.repository }}:vuln-test'
# severity: 'CRITICAL,HIGH'
# exit-code: '1'

################################### PYTEST ###################################
pytest:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -145,3 +106,47 @@ jobs:
- name: Test with Bandit
run: |
bandit -r src -n3 -lll
############################## test-image-build ##############################
test-image-build:
runs-on: ubuntu-latest
# if: ${{ github.actor == 'dependabot[bot]' }}
steps:
- uses: actions/checkout@v3

- name: Set short git commit SHA
id: vars
run: |
echo "short_sha=$(git rev-parse --short ${{ github.sha }})" >> $GITHUB_OUTPUT
# https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/

- name: Confirm git commit SHA output
run: echo ${{ steps.vars.outputs.short_sha }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
logout: true

- name: Parse Github Reference Name
id: branch
run: |
REF=${{ github.ref_name }}
echo "GHR=${REF%/*}" >> $GITHUB_OUTPUT
# Notes on Cache:
# https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
- name: Build Container
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: |
${{ github.repository }}:test_${{ steps.branch.outputs.GHR }}
cache-from: type=registry,ref=${{ github.repository }}:buildcache
cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
######################################################
FROM python:3.12.1-alpine3.19


# Install required packages
RUN apk update && \
apk add g++ make
Expand All @@ -31,4 +32,4 @@ RUN make install
RUN make install.dug

# Run it
ENTRYPOINT dug
ENTRYPOINT dug
2 changes: 1 addition & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ services:
##
#################################################################################
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.5.2
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
networks:
- dug-network
environment:
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ elasticsearch[async]==8.5.2
gunicorn
itsdangerous
Jinja2
jsonpickle
jsonschema
MarkupSafe
ormar
Expand All @@ -27,4 +28,4 @@ click
httpx
linkml-runtime==1.6.0
bmt==1.1.0
urllib3
urllib3
42 changes: 32 additions & 10 deletions src/dug/core/annotators/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,33 @@
logging.getLogger("urllib3").setLevel(logging.WARNING)

class DugIdentifier:
""" The Dug Identifier is the core piece of information about a concept that produced from a request to an annotator based on a some original source of data.
\n The information that is being stored is mostly meant to support the Monarch API but should be adjusted accordingly to suit new Annotators needs in the future.
"""Core information about a concept, produced from annotator request
The Dug Identifier is the core piece of information about a concept that
produced from a request to an annotator based on a some original source of
data.
\n The information that is being stored is mostly meant to support the
Monarch API but should be adjusted accordingly to suit new Annotators needs
in the future.
\n The information that will be needed for all annotators are:
\n id: The CURIE identifier
\n label: The CURIE identifier
\n description: The CURIE identifier
\n When there is another supported Normalizer it will be seperated into a separate plugin like annotator.
\n When there is another supported Normalizer it will be seperated into a
separate plugin like annotator.
"""

def __init__(self, id, label, types=None, search_text="", description=""):
"custom init stores parameters to initial values"

self.id = id
self.label = label
self.description = description
if types is None:
types = []
self.types = types
self.search_text = [search_text] if search_text else []
self.search_text = sorted([search_text]) if search_text else []
self.equivalent_identifiers = []
self.synonyms = []
self.purl = ""
Expand All @@ -39,12 +50,12 @@ def id_type(self):
return self.id.split(":")[0]

def add_search_text(self, text):
# Add text only if it's unique and if not empty string
"Add text only if it's unique and if not empty string"
if text and text not in self.search_text:
self.search_text.append(text)
self.search_text = sorted(self.search_text + [text])

def get_searchable_dict(self):
# Return a version of the identifier compatible with what's in ElasticSearch
"Return version of identifier compatible with what's in ElasticSearch"
es_ident = {
"id": self.id,
"label": self.label,
Expand All @@ -55,8 +66,10 @@ def get_searchable_dict(self):
return es_ident

def jsonable(self):
"Output pickleable object (used by utils.complex_handler)"
return self.__dict__


def __str__(self):
return json.dumps(self.__dict__, indent=2, default=utils.complex_handler)

Expand All @@ -81,9 +94,18 @@ def __call__(self, value: Input, http_session: Session) -> Output:


class DefaultNormalizer():
""" After annotation there must be a Noramlizing step to collasce equivalent concepts into one official concept. This is a needed step for the knowledge graph to map between different concepts.
\n The reason why this class in integrated into the annotators.py is because currently there is only one supported Normalizer through the NCATs Translator API.
\n When there is another supported Normalizer it will be seperated into a separate plugin like annotator.
"""Default concept normalizer class
After annotation there must be a Normalizing step to collasce equivalent
concepts into one official concept. This is a needed step for the knowledge
graph to map between different concepts.
The reason why this class in integrated into the annotators.py is because
currently there is only one supported Normalizer through the NCATs
Translator API.
When there is another supported Normalizer it will be seperated into a
separate plugin like annotator.
"""

def __init__(self, url):
Expand Down
14 changes: 8 additions & 6 deletions src/dug/core/parsers/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def add_concept(self, concept):
self.concepts[concept.id] = concept

def jsonable(self):
"""Output a pickleable object"""
return self.__dict__

def get_searchable_dict(self):
Expand All @@ -55,15 +56,15 @@ def set_search_terms(self):
concept.set_search_terms()
search_terms.extend(concept.search_terms)
search_terms.append(concept.name)
search_terms = list(set(search_terms))
search_terms = sorted(list(set(search_terms)))
self.search_terms = search_terms

def set_optional_terms(self):
optional_terms = []
for concept_id, concept in self.concepts.items():
concept.set_optional_terms()
optional_terms.extend(concept.optional_terms)
optional_terms = list(set(optional_terms))
optional_terms = sorted(list(set(optional_terms)))
self.optional_terms = optional_terms

def __str__(self):
Expand Down Expand Up @@ -99,23 +100,23 @@ def add_kg_answer(self, answer, query_name):
self.kg_answers[answer_id] = answer

def clean(self):
self.search_terms = list(set(self.search_terms))
self.optional_terms = list(set(self.optional_terms))
self.search_terms = sorted(list(set(self.search_terms)))
self.optional_terms = sorted(list(set(self.optional_terms)))

def set_search_terms(self):
# Traverse set of identifiers to determine set of search terms
search_terms = self.search_terms
for ident_id, ident in self.identifiers.items():
search_terms.extend(ident.search_text + ident.synonyms)
self.search_terms = list(set(search_terms))
self.search_terms = sorted(list(set(search_terms)))

def set_optional_terms(self):
# Traverse set of knowledge graph answers to determine set of optional search terms
optional_terms = self.optional_terms
for kg_id, kg_answer in self.kg_answers.items():
optional_terms += kg_answer.get_node_names()
optional_terms += kg_answer.get_node_synonyms()
self.optional_terms = list(set(optional_terms))
self.optional_terms = sorted(list(set(optional_terms)))

def get_searchable_dict(self):
# Translate DugConcept into Elastic-Compatible Concept
Expand All @@ -132,6 +133,7 @@ def get_searchable_dict(self):
return es_conc

def jsonable(self):
"""Output a pickleable object"""
return self.__dict__

def __str__(self):
Expand Down

0 comments on commit 92cec85

Please sign in to comment.