Merge branch 'sort_pickle_lists' into patch-nrslv-resp

helxplatform · Jan 24, 2024 · 92cec85 · 92cec85
2 parents d7257df + f3d9411
commit 92cec85
Show file tree

Hide file tree

Showing 7 changed files with 90 additions and 59 deletions.
diff --git a/.github/workflows/build-push-release.yml b/.github/workflows/build-push-release.yml
@@ -18,7 +18,7 @@ on:
       - .dockerignore
       - .githooks
     tags-ignore:
-      - 'v[0-9]+.[0-9]+.*'
+      - '*'
 jobs:
   build-push-release:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
@@ -66,45 +66,6 @@ jobs:
       # flake8 --ignore=E,W --exit-zero . 
       continue-on-error: true
 
-# ############################## build-vuln-test ##############################
-  # build-vuln-test:
-  #   # needs: flake8-linter
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #   - uses: actions/checkout@v3
-
-  #   - name: Set up Docker Buildx
-  #     uses: docker/setup-buildx-action@v3
-  #     with:
-  #       driver-opts: |
-  #         network=host
-
-  #   - name: Login to DockerHub
-  #     uses: docker/login-action@v3
-  #     with:
-  #       username: ${{ secrets.DOCKERHUB_USERNAME }}
-  #       password: ${{ secrets.DOCKERHUB_TOKEN }}
-  #       logout: true
-
-  #   # Notes on Cache: 
-  #   # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
-  #   - name: Build Container
-  #     uses: docker/build-push-action@v5
-  #     with:
-  #       context: .
-  #       push: false
-  #       load: true
-  #       tag: ${{ github.repository }}:vuln-test
-  #       cache-from: type=registry,ref=${{ github.repository }}:buildcache
-  #       cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
-  #   ####### Run for Fidelity ######
-  #   - name: Run Trivy vulnerability scanner
-  #     uses: aquasecurity/trivy-action@master
-  #     with:
-  #       image-ref: '${{ github.repository }}:vuln-test'
-  #       severity: 'CRITICAL,HIGH'
-  #       exit-code: '1'
-
 ################################### PYTEST ###################################
   pytest:
     runs-on: ubuntu-latest
@@ -145,3 +106,47 @@ jobs:
     - name: Test with Bandit
       run: |
         bandit -r src -n3 -lll
+
+############################## test-image-build ##############################
+  test-image-build:
+    runs-on: ubuntu-latest
+    # if: ${{ github.actor == 'dependabot[bot]' }}
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set short git commit SHA
+      id: vars
+      run: |
+        echo "short_sha=$(git rev-parse --short ${{ github.sha }})" >> $GITHUB_OUTPUT
+    # https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/
+
+    - name: Confirm git commit SHA output
+      run: echo ${{ steps.vars.outputs.short_sha }}
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to DockerHub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+        logout: true
+
+    - name: Parse Github Reference Name
+      id: branch
+      run: |
+        REF=${{ github.ref_name }}
+        echo "GHR=${REF%/*}" >> $GITHUB_OUTPUT
+        
+    # Notes on Cache: 
+    # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
+    - name: Build Container
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        push: true
+        tags: |
+          ${{ github.repository }}:test_${{ steps.branch.outputs.GHR }}
+        cache-from: type=registry,ref=${{ github.repository }}:buildcache
+        cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
diff --git a/Dockerfile b/Dockerfile
@@ -5,6 +5,7 @@
 ######################################################
 FROM python:3.12.1-alpine3.19
 
+
 # Install required packages
 RUN apk update && \
     apk add g++ make    
@@ -31,4 +32,4 @@ RUN make install
 RUN make install.dug
 
 # Run it
-ENTRYPOINT dug
+ENTRYPOINT dug
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -56,7 +56,7 @@ services:
   ##
   #################################################################################
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.5.2
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
     networks:
       - dug-network
     environment:

diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,7 @@ elasticsearch[async]==8.5.2
 gunicorn
 itsdangerous
 Jinja2
+jsonpickle
 jsonschema
 MarkupSafe
 ormar
@@ -27,4 +28,4 @@ click
 httpx
 linkml-runtime==1.6.0
 bmt==1.1.0
-urllib3
+urllib3
diff --git a/src/dug/core/annotators/_base.py b/src/dug/core/annotators/_base.py
@@ -14,22 +14,33 @@
 logging.getLogger("urllib3").setLevel(logging.WARNING)
 
 class DugIdentifier:
-    """ The Dug Identifier is the core piece of information about a concept that produced from a request to an annotator based on a some original source of data. 
-    \n The information that is being stored is mostly meant to support the Monarch API but should be adjusted accordingly to suit new Annotators needs in the future.
+    """Core information about a concept, produced from annotator request
+
+    The Dug Identifier is the core piece of information about a concept that
+    produced from a request to an annotator based on a some original source of
+    data.
+
+    \n The information that is being stored is mostly meant to support the
+    Monarch API but should be adjusted accordingly to suit new Annotators needs
+    in the future.
     \n The information that will be needed for all annotators are:
         \n id: The CURIE identifier
         \n label: The CURIE identifier
         \n description: The CURIE identifier
-    \n When there is another supported Normalizer it will be seperated into a separate plugin like annotator.
+    \n When there is another supported Normalizer it will be seperated into a
+    separate plugin like annotator.
     """
+
     def __init__(self, id, label, types=None, search_text="", description=""):
+        "custom init stores parameters to initial values"
+
         self.id = id
         self.label = label
         self.description = description
         if types is None:
             types = []
         self.types = types
-        self.search_text = [search_text] if search_text else []
+        self.search_text = sorted([search_text]) if search_text else []
         self.equivalent_identifiers = []
         self.synonyms = []
         self.purl = ""
@@ -39,12 +50,12 @@ def id_type(self):
         return self.id.split(":")[0]
 
     def add_search_text(self, text):
-        # Add text only if it's unique and if not empty string
+        "Add text only if it's unique and if not empty string"
         if text and text not in self.search_text:
-            self.search_text.append(text)
+            self.search_text = sorted(self.search_text + [text])
 
     def get_searchable_dict(self):
-        # Return a version of the identifier compatible with what's in ElasticSearch
+        "Return version of identifier compatible with what's in ElasticSearch"
         es_ident = {
             "id": self.id,
             "label": self.label,
@@ -55,8 +66,10 @@ def get_searchable_dict(self):
         return es_ident
 
     def jsonable(self):
+        "Output pickleable object (used by utils.complex_handler)"
         return self.__dict__
 
+
     def __str__(self):
         return json.dumps(self.__dict__, indent=2, default=utils.complex_handler)
 
@@ -81,9 +94,18 @@ def __call__(self, value: Input, http_session: Session) -> Output:
 
 
 class DefaultNormalizer():
-    """ After annotation there must be a Noramlizing step to collasce equivalent concepts into one official concept. This is a needed step for the knowledge graph to map between different concepts.
-    \n The reason why this class in integrated into the annotators.py is because currently there is only one supported Normalizer through the NCATs Translator API.
-    \n When there is another supported Normalizer it will be seperated into a separate plugin like annotator.
+    """Default concept normalizer class
+
+    After annotation there must be a Normalizing step to collasce equivalent
+    concepts into one official concept. This is a needed step for the knowledge
+    graph to map between different concepts.
+
+    The reason why this class in integrated into the annotators.py is because
+    currently there is only one supported Normalizer through the NCATs
+    Translator API.
+
+    When there is another supported Normalizer it will be seperated into a
+    separate plugin like annotator.
     """
 
     def __init__(self, url):

diff --git a/src/dug/core/parsers/_base.py b/src/dug/core/parsers/_base.py
@@ -29,6 +29,7 @@ def add_concept(self, concept):
         self.concepts[concept.id] = concept
 
     def jsonable(self):
+        """Output a pickleable object"""
         return self.__dict__
 
     def get_searchable_dict(self):
@@ -55,15 +56,15 @@ def set_search_terms(self):
             concept.set_search_terms()
             search_terms.extend(concept.search_terms)
             search_terms.append(concept.name)
-        search_terms = list(set(search_terms))
+        search_terms = sorted(list(set(search_terms)))
         self.search_terms = search_terms
 
     def set_optional_terms(self):
         optional_terms = []
         for concept_id, concept in self.concepts.items():
             concept.set_optional_terms()
             optional_terms.extend(concept.optional_terms)
-        optional_terms = list(set(optional_terms))
+        optional_terms = sorted(list(set(optional_terms)))
         self.optional_terms = optional_terms
 
     def __str__(self):
@@ -99,23 +100,23 @@ def add_kg_answer(self, answer, query_name):
             self.kg_answers[answer_id] = answer
 
     def clean(self):
-        self.search_terms = list(set(self.search_terms))
-        self.optional_terms = list(set(self.optional_terms))
+        self.search_terms = sorted(list(set(self.search_terms)))
+        self.optional_terms = sorted(list(set(self.optional_terms)))
 
     def set_search_terms(self):
         # Traverse set of identifiers to determine set of search terms
         search_terms = self.search_terms
         for ident_id, ident in self.identifiers.items():
             search_terms.extend(ident.search_text + ident.synonyms)
-        self.search_terms = list(set(search_terms))
+        self.search_terms = sorted(list(set(search_terms)))
 
     def set_optional_terms(self):
         # Traverse set of knowledge graph answers to determine set of optional search terms
         optional_terms = self.optional_terms
         for kg_id, kg_answer in self.kg_answers.items():
             optional_terms += kg_answer.get_node_names()
             optional_terms += kg_answer.get_node_synonyms()
-        self.optional_terms = list(set(optional_terms))
+        self.optional_terms = sorted(list(set(optional_terms)))
 
     def get_searchable_dict(self):
         # Translate DugConcept into Elastic-Compatible Concept
@@ -132,6 +133,7 @@ def get_searchable_dict(self):
         return es_conc
 
     def jsonable(self):
+        """Output a pickleable object"""
         return self.__dict__
 
     def __str__(self):