diff --git a/.github/workflows/build-push-release.yml b/.github/workflows/build-push-release.yml index f23dc15..a383cef 100644 --- a/.github/workflows/build-push-release.yml +++ b/.github/workflows/build-push-release.yml @@ -18,7 +18,7 @@ on: - .dockerignore - .githooks tags-ignore: - - 'v[0-9]+.[0-9]+.*' + - '*' jobs: build-push-release: runs-on: ubuntu-latest diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 0dc8428..401c24c 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -66,45 +66,6 @@ jobs: # flake8 --ignore=E,W --exit-zero . continue-on-error: true -# ############################## build-vuln-test ############################## - # build-vuln-test: - # # needs: flake8-linter - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v3 - - # - name: Set up Docker Buildx - # uses: docker/setup-buildx-action@v3 - # with: - # driver-opts: | - # network=host - - # - name: Login to DockerHub - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_TOKEN }} - # logout: true - - # # Notes on Cache: - # # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache - # - name: Build Container - # uses: docker/build-push-action@v5 - # with: - # context: . - # push: false - # load: true - # tag: ${{ github.repository }}:vuln-test - # cache-from: type=registry,ref=${{ github.repository }}:buildcache - # cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max - # ####### Run for Fidelity ###### - # - name: Run Trivy vulnerability scanner - # uses: aquasecurity/trivy-action@master - # with: - # image-ref: '${{ github.repository }}:vuln-test' - # severity: 'CRITICAL,HIGH' - # exit-code: '1' - ################################### PYTEST ################################### pytest: runs-on: ubuntu-latest @@ -145,3 +106,47 @@ jobs: - name: Test with Bandit run: | bandit -r src -n3 -lll + +############################## test-image-build ############################## + test-image-build: + runs-on: ubuntu-latest + # if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - uses: actions/checkout@v3 + + - name: Set short git commit SHA + id: vars + run: | + echo "short_sha=$(git rev-parse --short ${{ github.sha }})" >> $GITHUB_OUTPUT + # https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/ + + - name: Confirm git commit SHA output + run: echo ${{ steps.vars.outputs.short_sha }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + logout: true + + - name: Parse Github Reference Name + id: branch + run: | + REF=${{ github.ref_name }} + echo "GHR=${REF%/*}" >> $GITHUB_OUTPUT + + # Notes on Cache: + # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache + - name: Build Container + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: | + ${{ github.repository }}:test_${{ steps.branch.outputs.GHR }} + cache-from: type=registry,ref=${{ github.repository }}:buildcache + cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 6147d76..e42083a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,6 +5,7 @@ ###################################################### FROM python:3.12.1-alpine3.19 + # Install required packages RUN apk update && \ apk add g++ make @@ -31,4 +32,4 @@ RUN make install RUN make install.dug # Run it -ENTRYPOINT dug \ No newline at end of file +ENTRYPOINT dug diff --git a/docker-compose.yaml b/docker-compose.yaml index 8e59bd5..8e8d27d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -56,7 +56,7 @@ services: ## ################################################################################# elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.5.2 + image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3 networks: - dug-network environment: diff --git a/requirements.txt b/requirements.txt index 531f5ab..2bbadab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ elasticsearch[async]==8.5.2 gunicorn itsdangerous Jinja2 +jsonpickle jsonschema MarkupSafe ormar @@ -27,4 +28,4 @@ click httpx linkml-runtime==1.6.0 bmt==1.1.0 -urllib3 \ No newline at end of file +urllib3 diff --git a/src/dug/core/annotators/_base.py b/src/dug/core/annotators/_base.py index ea30b4d..0589051 100644 --- a/src/dug/core/annotators/_base.py +++ b/src/dug/core/annotators/_base.py @@ -14,22 +14,33 @@ logging.getLogger("urllib3").setLevel(logging.WARNING) class DugIdentifier: - """ The Dug Identifier is the core piece of information about a concept that produced from a request to an annotator based on a some original source of data. - \n The information that is being stored is mostly meant to support the Monarch API but should be adjusted accordingly to suit new Annotators needs in the future. + """Core information about a concept, produced from annotator request + + The Dug Identifier is the core piece of information about a concept that + produced from a request to an annotator based on a some original source of + data. + + \n The information that is being stored is mostly meant to support the + Monarch API but should be adjusted accordingly to suit new Annotators needs + in the future. \n The information that will be needed for all annotators are: \n id: The CURIE identifier \n label: The CURIE identifier \n description: The CURIE identifier - \n When there is another supported Normalizer it will be seperated into a separate plugin like annotator. + \n When there is another supported Normalizer it will be seperated into a + separate plugin like annotator. """ + def __init__(self, id, label, types=None, search_text="", description=""): + "custom init stores parameters to initial values" + self.id = id self.label = label self.description = description if types is None: types = [] self.types = types - self.search_text = [search_text] if search_text else [] + self.search_text = sorted([search_text]) if search_text else [] self.equivalent_identifiers = [] self.synonyms = [] self.purl = "" @@ -39,12 +50,12 @@ def id_type(self): return self.id.split(":")[0] def add_search_text(self, text): - # Add text only if it's unique and if not empty string + "Add text only if it's unique and if not empty string" if text and text not in self.search_text: - self.search_text.append(text) + self.search_text = sorted(self.search_text + [text]) def get_searchable_dict(self): - # Return a version of the identifier compatible with what's in ElasticSearch + "Return version of identifier compatible with what's in ElasticSearch" es_ident = { "id": self.id, "label": self.label, @@ -55,8 +66,10 @@ def get_searchable_dict(self): return es_ident def jsonable(self): + "Output pickleable object (used by utils.complex_handler)" return self.__dict__ + def __str__(self): return json.dumps(self.__dict__, indent=2, default=utils.complex_handler) @@ -81,9 +94,18 @@ def __call__(self, value: Input, http_session: Session) -> Output: class DefaultNormalizer(): - """ After annotation there must be a Noramlizing step to collasce equivalent concepts into one official concept. This is a needed step for the knowledge graph to map between different concepts. - \n The reason why this class in integrated into the annotators.py is because currently there is only one supported Normalizer through the NCATs Translator API. - \n When there is another supported Normalizer it will be seperated into a separate plugin like annotator. + """Default concept normalizer class + + After annotation there must be a Normalizing step to collasce equivalent + concepts into one official concept. This is a needed step for the knowledge + graph to map between different concepts. + + The reason why this class in integrated into the annotators.py is because + currently there is only one supported Normalizer through the NCATs + Translator API. + + When there is another supported Normalizer it will be seperated into a + separate plugin like annotator. """ def __init__(self, url): diff --git a/src/dug/core/parsers/_base.py b/src/dug/core/parsers/_base.py index 43a1801..f6d3b77 100644 --- a/src/dug/core/parsers/_base.py +++ b/src/dug/core/parsers/_base.py @@ -29,6 +29,7 @@ def add_concept(self, concept): self.concepts[concept.id] = concept def jsonable(self): + """Output a pickleable object""" return self.__dict__ def get_searchable_dict(self): @@ -55,7 +56,7 @@ def set_search_terms(self): concept.set_search_terms() search_terms.extend(concept.search_terms) search_terms.append(concept.name) - search_terms = list(set(search_terms)) + search_terms = sorted(list(set(search_terms))) self.search_terms = search_terms def set_optional_terms(self): @@ -63,7 +64,7 @@ def set_optional_terms(self): for concept_id, concept in self.concepts.items(): concept.set_optional_terms() optional_terms.extend(concept.optional_terms) - optional_terms = list(set(optional_terms)) + optional_terms = sorted(list(set(optional_terms))) self.optional_terms = optional_terms def __str__(self): @@ -99,15 +100,15 @@ def add_kg_answer(self, answer, query_name): self.kg_answers[answer_id] = answer def clean(self): - self.search_terms = list(set(self.search_terms)) - self.optional_terms = list(set(self.optional_terms)) + self.search_terms = sorted(list(set(self.search_terms))) + self.optional_terms = sorted(list(set(self.optional_terms))) def set_search_terms(self): # Traverse set of identifiers to determine set of search terms search_terms = self.search_terms for ident_id, ident in self.identifiers.items(): search_terms.extend(ident.search_text + ident.synonyms) - self.search_terms = list(set(search_terms)) + self.search_terms = sorted(list(set(search_terms))) def set_optional_terms(self): # Traverse set of knowledge graph answers to determine set of optional search terms @@ -115,7 +116,7 @@ def set_optional_terms(self): for kg_id, kg_answer in self.kg_answers.items(): optional_terms += kg_answer.get_node_names() optional_terms += kg_answer.get_node_synonyms() - self.optional_terms = list(set(optional_terms)) + self.optional_terms = sorted(list(set(optional_terms))) def get_searchable_dict(self): # Translate DugConcept into Elastic-Compatible Concept @@ -132,6 +133,7 @@ def get_searchable_dict(self): return es_conc def jsonable(self): + """Output a pickleable object""" return self.__dict__ def __str__(self):