diff --git a/.github/workflows/build-push-dev-image.yml b/.github/workflows/build-push-dev-image.yml
index 24abc615..6951c57e 100644
--- a/.github/workflows/build-push-dev-image.yml
+++ b/.github/workflows/build-push-dev-image.yml
@@ -48,20 +48,20 @@ jobs:
     # https://github.com/marketplace/actions/build-and-push-docker-images
 
     - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v2
+      uses: docker/setup-buildx-action@v3
       with:
         driver-opts: |
           network=host
 
     - name: Login to DockerHub
-      uses: docker/login-action@v2
+      uses: docker/login-action@v3
       with:
         username: ${{ secrets.DOCKERHUB_USERNAME }}
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         logout: true
 
     - name: Login to Container Registry
-      uses: docker/login-action@v2
+      uses: docker/login-action@v3
       with:
         registry: containers.renci.org
         username: ${{ secrets.CONTAINERHUB_USERNAME }}
@@ -72,7 +72,7 @@ jobs:
     # Notes on Cache: 
     # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
     - name: Build Push Container
-      uses: docker/build-push-action@v4
+      uses: docker/build-push-action@v5
       with:
         context: .
         push: true
diff --git a/.github/workflows/build-push-release.yml b/.github/workflows/build-push-release.yml
index 06656b6f..a383cef2 100644
--- a/.github/workflows/build-push-release.yml
+++ b/.github/workflows/build-push-release.yml
@@ -18,7 +18,7 @@ on:
       - .dockerignore
       - .githooks
     tags-ignore:
-      - 'v[0-9]+.[0-9]+.*'
+      - '*'
 jobs:
   build-push-release:
     runs-on: ubuntu-latest
@@ -63,20 +63,20 @@ jobs:
     # step
     # https://github.com/marketplace/actions/build-and-push-docker-images
     - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v2
+      uses: docker/setup-buildx-action@v3
       with:
         driver-opts: |
           network=host
 
     - name: Login to DockerHub
-      uses: docker/login-action@v2
+      uses: docker/login-action@v3
       with:
         username: ${{ secrets.DOCKERHUB_USERNAME }}
         password: ${{ secrets.DOCKERHUB_TOKEN }}
         logout: true
 
     - name: Login to Container Registry
-      uses: docker/login-action@v2
+      uses: docker/login-action@v3
       with:
         registry: containers.renci.org
         username: ${{ secrets.CONTAINERHUB_USERNAME }}
@@ -86,7 +86,7 @@ jobs:
     # Notes on Cache: 
     # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
     - name: Build Push Container
-      uses: docker/build-push-action@v4
+      uses: docker/build-push-action@v5
       with:
         push: true
         # Push to renci-registry and dockerhub here.
diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml
index 193756d9..401c24cc 100644
--- a/.github/workflows/code-checks.yml
+++ b/.github/workflows/code-checks.yml
@@ -1,11 +1,9 @@
 # Workflow responsible for core acceptance testing.
 # Tests Currently Run:
 #     - flake8-linter
-#     - image-build-test
-#    
-# This workflow only validates images can build
-# but does not push images to any repository.
-#
+#     - PYTest
+#     - Bandit
+# For PR Vulnerability Scanning a separate workflow will run.
 # The build-push-dev-image and build-push-release workflows 
 # handle the develop and release image storage respectively.
 #
@@ -13,11 +11,17 @@
 
 name: Code-Checks
 on:
-  push:
-    branches-ignore:
-      - master
-      - main
+  # push:
+  #   branches-ignore:
+  #     - master
+  #     - main
+  #     - develop
+  pull_request:
+    branches:
       - develop
+      - master
+      - main 
+    types: [ opened, synchronize ]
     paths-ignore:
       - README.md
       - .old_cicd/*
@@ -27,13 +31,6 @@ on:
       - .gitignore
       - .dockerignore
       - .githooks
-  pull_request:
-    branches:
-      - develop
-      - master
-      - main 
-    types: [ opened, synchronize ]
-
 
 jobs:
 ############################## flake8-linter ##############################
@@ -45,7 +42,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: '3.12'
 
     # Currently actions/setup-python supports caching
     # but the cache is not as robust as cache action.
@@ -69,35 +66,6 @@ jobs:
       # flake8 --ignore=E,W --exit-zero . 
       continue-on-error: true
 
-############################## test-image-build ##############################
-  test-image-build:
-    # needs: flake8-linter
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-
-    - name: Set up Docker Buildx
-      uses: docker/setup-buildx-action@v2
-      with:
-        driver-opts: |
-          network=host
-
-    - name: Login to DockerHub
-      uses: docker/login-action@v2
-      with:
-        username: ${{ secrets.DOCKERHUB_USERNAME }}
-        password: ${{ secrets.DOCKERHUB_TOKEN }}
-        logout: true
-
-    # Notes on Cache: 
-    # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
-    - name: Build Container
-      uses: docker/build-push-action@v4
-      with:
-        context: .
-        push: false
-        cache-from: type=registry,ref=${{ github.repository }}:buildcache
-        cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
 ################################### PYTEST ###################################
   pytest:
     runs-on: ubuntu-latest
@@ -106,7 +74,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: '3.12'
 
     - name: Install Requirements
       run: |
@@ -116,8 +84,7 @@ jobs:
 
     - name: Test with pytest
       run: |
-        pytest --doctest-modules src
-        coverage run -m pytest tests/unit
+        make test
 
 ############################ Bandit ################################
   bandit:
@@ -127,7 +94,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v4
       with:
-        python-version: '3.10'
+        python-version: '3.12'
 
     - name: Install Requirements
       run: |
@@ -138,4 +105,48 @@ jobs:
     # Only report high security issues
     - name: Test with Bandit
       run: |
-        bandit -r src -n3 -lll
\ No newline at end of file
+        bandit -r src -n3 -lll
+
+############################## test-image-build ##############################
+  test-image-build:
+    runs-on: ubuntu-latest
+    # if: ${{ github.actor == 'dependabot[bot]' }}
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set short git commit SHA
+      id: vars
+      run: |
+        echo "short_sha=$(git rev-parse --short ${{ github.sha }})" >> $GITHUB_OUTPUT
+    # https://github.blog/changelog/2022-10-11-github-actions-deprecating-save-state-and-set-output-commands/
+
+    - name: Confirm git commit SHA output
+      run: echo ${{ steps.vars.outputs.short_sha }}
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Login to DockerHub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+        logout: true
+
+    - name: Parse Github Reference Name
+      id: branch
+      run: |
+        REF=${{ github.ref_name }}
+        echo "GHR=${REF%/*}" >> $GITHUB_OUTPUT
+        
+    # Notes on Cache: 
+    # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
+    - name: Build Container
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        push: true
+        tags: |
+          ${{ github.repository }}:test_${{ steps.branch.outputs.GHR }}
+        cache-from: type=registry,ref=${{ github.repository }}:buildcache
+        cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
\ No newline at end of file
diff --git a/.github/workflows/trivy-pr-scan.yml b/.github/workflows/trivy-pr-scan.yml
new file mode 100644
index 00000000..19f86e14
--- /dev/null
+++ b/.github/workflows/trivy-pr-scan.yml
@@ -0,0 +1,68 @@
+
+name: trivy-pr-scan 
+on:
+  pull_request:
+    branches:
+      - develop
+      - master
+      - main 
+    types: [ opened, synchronize ]
+    paths-ignore:
+    - README.md
+    - .old_cicd/*
+    - .github/*
+    - .github/workflows/*
+    - LICENSE
+    - .gitignore
+    - .dockerignore
+    - .githooks
+
+jobs:
+ trivy-pr-scan:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+      with:
+        driver-opts: |
+          network=host
+
+    - name: Login to DockerHub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKERHUB_USERNAME }}
+        password: ${{ secrets.DOCKERHUB_TOKEN }}
+        logout: true
+
+    # Notes on Cache: 
+    # https://docs.docker.com/build/ci/github-actions/examples/#inline-cache
+    - name: Build Container
+      uses: docker/build-push-action@v5
+      with:
+        context: .
+        push: false
+        load: true
+        tags: ${{ github.repository }}:vuln-test
+        cache-from: type=registry,ref=${{ github.repository }}:buildcache
+        cache-to: type=registry,ref=${{ github.repository }}:buildcache,mode=max
+
+    # We will not be concerned with Medium and Low vulnerabilities
+    - name: Run Trivy vulnerability scanner
+      uses: aquasecurity/trivy-action@master
+      with:
+        image-ref: '${{ github.repository }}:vuln-test'
+        format: 'sarif'
+        severity: 'CRITICAL,HIGH'
+        ignore-unfixed: true
+        output: 'trivy-results.sarif'
+        exit-code: '1'
+    # Scan results should be viewable in GitHub Security Dashboard
+    # We still fail the job if results are found, so below will always run
+    # unless manually canceled.
+    - name: Upload Trivy scan results to GitHub Security tab
+      uses: github/codeql-action/upload-sarif@v2
+      if: '!cancelled()'
+      with:
+        sarif_file: 'trivy-results.sarif'
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 6f5b10e1..3980ddf1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,19 +3,23 @@
 # A container for the core semantic-search capability.
 #
 ######################################################
-FROM python:3.10.10-slim
+FROM python:3.12.1-alpine3.19
+
 
 # Install required packages
-RUN apt-get update && \
-    apt-get install -y curl make vim && \
-    rm -rf /var/cache/apt/*
+RUN apk update && \
+    apk add g++ make
+
+#upgrade openssl \
+RUN apk  add openssl=3.1.4-r4
 
+RUN pip install --upgrade pip
 # Create a non-root user.
 ENV USER dug
 ENV HOME /home/$USER
 ENV UID 1000
 
-RUN adduser --disabled-login --home $HOME --shell /bin/bash --uid $UID $USER
+RUN adduser -D --home $HOME  --uid $UID $USER
 
 USER $USER
 WORKDIR $HOME
@@ -31,4 +35,4 @@ RUN make install
 RUN make install.dug
 
 # Run it
-ENTRYPOINT dug
\ No newline at end of file
+ENTRYPOINT dug
diff --git a/Makefile b/Makefile
index 2b4a27d1..70dcba67 100644
--- a/Makefile
+++ b/Makefile
@@ -40,8 +40,6 @@ install.dug:
 
 #test: Run all tests
 test:
-	# ${PYTHON} -m flake8 src
-	${PYTHON} -m pytest --doctest-modules src
 	coverage run -m pytest tests
 
 coverage:
diff --git a/README.md b/README.md
index a992826d..d6692801 100644
--- a/README.md
+++ b/README.md
@@ -57,13 +57,13 @@ dug crawl tests/integration/data/test_variables_v1.0.csv -p "TOPMedTag"
 
 After crawling, you can search:
 ```shell
-dug search -q "heart attack" -t "concepts"
-dug search -q "heart attack" -t "variables" -k "concept=MONDO:0005068"
+dug search -q "vein" -t "concepts"
+dug search -q "vein" -t "variables" -k "concept=UBERON:0001638"
 ```
 
 You can also query Dug's REST API:
 ```shell
-query="`echo '{"index" : "concepts_index", "query" : "heart attack"}'`"
+query="`echo '{"index" : "concepts_index", "query" : "vein"}'`"
 
 curl --data "$query" \
      --header "Content-Type: application/json" \
@@ -290,3 +290,8 @@ TOPMed phenotypic concept data is [here](https://github.com/helxplatform/dug/tre
 ## Release
 
 To release, commit the change and select feature.
+
+#### Fail on Vulnerability Detection
+
+During PR's several vulnerability scanners are run. If there are vulnerabilities detected, the pr checks will fail and a report will be sent to Github Security Dashboard for viewing. Please ensure the vulnerability is mitigated prior to continuing the merge to protected branches.
+
diff --git a/bin/vlmd_to_dbgap_xml.py b/bin/vlmd_to_dbgap_xml.py
index 6263460c..5d2b9d39 100644
--- a/bin/vlmd_to_dbgap_xml.py
+++ b/bin/vlmd_to_dbgap_xml.py
@@ -161,10 +161,12 @@ def vlmd_to_dbgap_xml(input_file, output, file_format, study_id, appl_id, study_
                 # description later if that is useful.
                 if row.get('constraints.pattern'):
                     counters['constraints.pattern'] += 1
-                    logging.warning(f"`constraints.pattern` of {row['constraints.pattern']} found in row {row_index}, skipped.")
+                    logging.warning(f"`constraints.pattern` of {row['constraints.pattern']} found in row {row_index}, "
+                                    f"but pattern constraints are not currently being written.")
                 if row.get('format'):
                     counters['format'] += 1
-                    logging.warning(f"Found `format` of {row['format']} found in row {row_index}, skipped.")
+                    logging.warning(f"Found `format` of {row['format']} found in row {row_index}, but format is not "
+                                    f"currently being written.")
 
                 # Process enumerated and encoded values.
                 encs = {}
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 8e59bd53..8e8d27d2 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -56,7 +56,7 @@ services:
   ##
   #################################################################################
   elasticsearch:
-    image: docker.elastic.co/elasticsearch/elasticsearch:8.5.2
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.11.3
     networks:
       - dug-network
     environment:
diff --git a/requirements.txt b/requirements.txt
index 14208fcf..2bbadabe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,29 +1,31 @@
 aiohttp
 asyncio
-fastapi==0.95.0
-uvicorn==0.23.2
+fastapi
+uvicorn
 elasticsearch[async]==8.5.2
 gunicorn
 itsdangerous
 Jinja2
+jsonpickle
 jsonschema
 MarkupSafe
-ormar==0.12.1
-mistune==2.0.3
-pluggy==1.0.0
-pyrsistent==0.17.3
+ormar
+mistune
+pluggy
+pyrsistent
 pytest
-pytz==2021.1
-PyYAML==6.0
-requests==2.31.0
-# old redis==4.4.2
-redis==4.5.1
-requests-cache==0.9.8
-six==1.16.0
+pytest-asyncio
+pytz
+PyYAML
+requests
+redis
+requests-cache
+six
 
 # Click for command line arguments
 # We use Click 7.0 because that's what one of the pinned packages above use.
 click
-httpx>=0.24.1
+httpx
+linkml-runtime==1.6.0
 bmt==1.1.0
-urllib3>=1.26.17
\ No newline at end of file
+urllib3
diff --git a/setup.cfg b/setup.cfg
index cab748f1..0df3d5d7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,8 +23,8 @@ install_requires =
     elasticsearch==8.5.2
     pluggy
     requests
-    requests_cache==0.9.8
-    redis==4.5.1
+    requests_cache
+    redis
 
 [options.entry_points]
 console_scripts =
@@ -32,8 +32,8 @@ console_scripts =
 
 [options.extras_require]
 rest =
-    fastapi==0.95.0
-    uvicorn==0.23.2
+    fastapi
+    uvicorn
     gunicorn
     jsonschema
 
diff --git a/src/dug/cli.py b/src/dug/cli.py
index 0ec6c73f..f211e3a4 100755
--- a/src/dug/cli.py
+++ b/src/dug/cli.py
@@ -51,6 +51,13 @@ def get_argparser():
         required=True
     )
 
+    crawl_parser.add_argument(
+        '-a', '--annotator',
+        help='Annotator used to annotate identifiers in crawl file',
+        dest="annotator_type",
+        default="monarch"
+    )
+
     crawl_parser.add_argument(
         '-e', '--element-type',
         help='[Optional] Coerce all elements to a certain data type (e.g. DbGaP Variable).\n'
@@ -108,7 +115,7 @@ def crawl(args):
         config.node_to_element_queries = {}
     factory = DugFactory(config)
     dug = Dug(factory)
-    dug.crawl(args.target, args.parser_type, args.element_type)
+    dug.crawl(args.target, args.parser_type, args.annotator_type, args.element_type)
 
 
 def search(args):
diff --git a/src/dug/config.py b/src/dug/config.py
index ba050bbe..b070cac1 100644
--- a/src/dug/config.py
+++ b/src/dug/config.py
@@ -9,8 +9,9 @@
 @dataclass
 class Config:
     """
-        TODO: Populate description
+    TODO: Populate description
     """
+
     elastic_password: str = "changeme"
     redis_password: str = "changeme"
 
@@ -27,74 +28,102 @@ class Config:
     nboost_port: int = 8000
 
     # Preprocessor config that will be passed to annotate.Preprocessor constructor
-    preprocessor: dict = field(default_factory=lambda: {
-        "debreviator": {
-            "BMI": "body mass index"
-        },
-        "stopwords": ["the"]
-    })
-
+    preprocessor: dict = field(
+        default_factory=lambda: {
+            "debreviator": {"BMI": "body mass index"},
+            "stopwords": ["the"],
+        }
+    )
+    annotator_type: str = "monarch"
     # Annotator config that will be passed to annotate.Annotator constructor
-    annotator: dict = field(default_factory=lambda: {
-        "url": "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
-    })
+    annotator_args: dict = field(
+        default_factory=lambda: {
+            "monarch": {
+                "url": "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
+            },
+            "sapbert": {
+                "classification_url": "https://med-nemo.apps.renci.org/annotate/",
+                "annotator_url": "https://babel-sapbert.apps.renci.org/annotate/",
+            },
+        }
+    )
 
     # Normalizer config that will be passed to annotate.Normalizer constructor
-    normalizer: dict = field(default_factory=lambda: {
-        "url": "https://nodenormalization-dev.apps.renci.org/get_normalized_nodes?conflate=false&description=true&curie="
-    })
+    normalizer: dict = field(
+        default_factory=lambda: {
+            "url": "https://nodenormalization-dev.apps.renci.org/get_normalized_nodes?conflate=false&description=true&curie="
+        }
+    )
 
     # Synonym service config that will be passed to annotate.SynonymHelper constructor
-    synonym_service: dict = field(default_factory=lambda: {
-        "url": "https://name-resolution-sri.renci.org/reverse_lookup"
-    })
+    synonym_service: dict = field(
+        default_factory=lambda: {
+            "url": "https://name-resolution-sri.renci.org/reverse_lookup"
+        }
+    )
 
     # Ontology metadata helper config that will be passed to annotate.OntologyHelper constructor
-    ontology_helper: dict = field(default_factory=lambda: {
-        "url": "https://api.monarchinitiative.org/api/bioentity/"
-    })
+    ontology_helper: dict = field(
+        default_factory=lambda: {
+            "url": "https://api.monarchinitiative.org/api/bioentity/"
+        }
+    )
 
     # Redlist of identifiers not to expand via TranQL
     tranql_exclude_identifiers: list = field(default_factory=lambda: ["CHEBI:17336"])
 
-    tranql_queries: dict = field(default_factory=lambda: {
-        "disease": ["disease", "phenotypic_feature"],
-        "pheno": ["phenotypic_feature", "disease"],
-        "anat": ["disease", "anatomical_entity"],
-        "chem_to_disease": ["chemical_entity", "disease"],
-        "small_molecule_to_disease": ["small_molecule", "disease"],
-        "chemical_mixture_to_disease": ["chemical_mixture", "disease"],
-        "phen_to_anat": ["phenotypic_feature", "anatomical_entity"],
-    })
-
-    node_to_element_queries: dict = field(default_factory=lambda: {
-        # Dug element type to cast the query kg nodes to
-        "cde": {
-            # Parse nodes matching criteria in kg
-            "node_type": "biolink:Publication",
-            "curie_prefix": "HEALCDE",
-            # list of attributes that are lists to be casted to strings 
-            "list_field_choose_first": [
-                "files"
-            ],
-            "attribute_mapping": {
-                # "DugElement Attribute" : "KG Node attribute"
-                "name": "name",
-                "desc": "summary",
-                "collection_name": "cde_category",
-                "collection_id":  "cde_category",
-                "collection_action": "files"
+    tranql_queries: dict = field(
+        default_factory=lambda: {
+            "disease": ["disease", "phenotypic_feature"],
+            "pheno": ["phenotypic_feature", "disease"],
+            "anat": ["disease", "anatomical_entity"],
+            "chem_to_disease": ["chemical_entity", "disease"],
+            "small_molecule_to_disease": ["small_molecule", "disease"],
+            "chemical_mixture_to_disease": ["chemical_mixture", "disease"],
+            "phen_to_anat": ["phenotypic_feature", "anatomical_entity"],
+        }
+    )
+
+    node_to_element_queries: dict = field(
+        default_factory=lambda: {
+            # Dug element type to cast the query kg nodes to
+            "cde": {
+                # Parse nodes matching criteria in kg
+                "node_type": "biolink:Publication",
+                "curie_prefix": "HEALCDE",
+                # list of attributes that are lists to be casted to strings
+                "list_field_choose_first": ["files"],
+                "attribute_mapping": {
+                    # "DugElement Attribute" : "KG Node attribute"
+                    "name": "name",
+                    "desc": "summary",
+                    "collection_name": "cde_category",
+                    "collection_id": "cde_category",
+                    "action": "files",
+                },
             }
         }
-    })
+    )
 
-    concept_expander: dict = field(default_factory=lambda: {
-        "url": "https://tranql-dev.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
-        "min_tranql_score": 0.0
-    })
+    concept_expander: dict = field(
+        default_factory=lambda: {
+            "url": "https://tranql-dev.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
+            "min_tranql_score": 0.0,
+        }
+    )
 
     # List of ontology types that can be used even if they fail normalization
-    ontology_greenlist: list = field(default_factory=lambda: ["PATO", "CHEBI", "MONDO", "UBERON", "HP", "MESH", "UMLS"])
+    ontology_greenlist: list = field(
+        default_factory=lambda: [
+            "PATO",
+            "CHEBI",
+            "MONDO",
+            "UBERON",
+            "HP",
+            "MESH",
+            "UMLS",
+        ]
+    )
 
     @classmethod
     def from_env(cls):
@@ -107,7 +136,7 @@ def from_env(cls):
             "elastic_password": "ELASTIC_PASSWORD",
             "redis_host": "REDIS_HOST",
             "redis_port": "REDIS_PORT",
-            "redis_password": "REDIS_PASSWORD"
+            "redis_password": "REDIS_PASSWORD",
         }
 
         kwargs = {}
diff --git a/src/dug/core/__init__.py b/src/dug/core/__init__.py
index f1fd8eda..effcb7b9 100644
--- a/src/dug/core/__init__.py
+++ b/src/dug/core/__init__.py
@@ -12,8 +12,10 @@
 
 from dug import hookspecs
 from dug.core import parsers
+from dug.core import annotators
 from dug.core.factory import DugFactory
 from dug.core.parsers import DugConcept, Parser, get_parser
+from dug.core.annotators import DugIdentifier, Annotator, get_annotator
 
 logger = logging.getLogger('dug')
 stdout_log_handler = logging.StreamHandler(sys.stdout)
@@ -29,6 +31,7 @@ def get_plugin_manager() -> pluggy.PluginManager:
     pm.add_hookspecs(hookspecs)
     pm.load_setuptools_entrypoints("dug")
     pm.register(parsers)
+    pm.register(annotators)
     return pm
 
 
@@ -56,19 +59,20 @@ def __init__(self, factory: DugFactory):
             ]
         )
 
-    def crawl(self, target_name: str, parser_type: str, element_type: str = None):
+    def crawl(self, target_name: str, parser_type: str, annotator_type: str, element_type: str = None):
 
         pm = get_plugin_manager()
         parser = get_parser(pm.hook, parser_type)
+        annotator = get_annotator(pm.hook, annotator_type, self._factory.config)
         targets = get_targets(target_name)
 
         for target in targets:
-            self._crawl(target, parser, element_type)
+            self._crawl(target, parser, annotator, element_type)
 
-    def _crawl(self, target: Path, parser: Parser, element_type):
+    def _crawl(self, target: Path, parser: Parser, annotator: Annotator, element_type):
 
         # Initialize crawler
-        crawler = self._factory.build_crawler(target, parser, element_type)
+        crawler = self._factory.build_crawler(target, parser, annotator, element_type)
         # Read elements, annotate, and expand using tranql queries
         crawler.crawl()
 
@@ -93,11 +97,11 @@ def search(self, target, query, **kwargs):
         event_loop = asyncio.get_event_loop()
         targets = {
             'concepts': partial(
-                self._search.search_concepts, index=kwargs.get('index', self.concepts_index)),
+                self._search.search_concepts),
             'variables': partial(
-                self._search.search_variables, index=kwargs.get('index', self.variables_index), concept=kwargs.pop('concept', None)),
+                self._search.search_variables, concept=kwargs.pop('concept', None)),
             'kg': partial(
-                self._search.search_kg, index=kwargs.get('index', self.kg_index), unique_id=kwargs.pop('unique_id', None))
+                self._search.search_kg, unique_id=kwargs.pop('unique_id', None))
         }
         kwargs.pop('index', None)
         func = targets.get(target)
diff --git a/src/dug/core/annotate.py b/src/dug/core/annotate.py
deleted file mode 100644
index bbf766b4..00000000
--- a/src/dug/core/annotate.py
+++ /dev/null
@@ -1,618 +0,0 @@
-import json
-import logging
-import os
-import re
-import urllib.parse
-from typing import TypeVar, Generic, Union, List, Tuple, Optional
-import bmt
-import requests
-from requests import Session
-
-import dug.core.tranql as tql
-
-
-logger = logging.getLogger('dug')
-
-logging.getLogger("requests").setLevel(logging.WARNING)
-logging.getLogger("urllib3").setLevel(logging.WARNING)
-
-
-class Identifier:
-    def __init__(self, id, label, types=None, search_text="", description=""):
-        self.id = id
-        self.label = label
-        self.description = description
-        if types is None:
-            types = []
-        self.types = types
-        self.search_text = [search_text] if search_text else []
-        self.equivalent_identifiers = []
-        self.synonyms = []
-        self.purl = ""
-
-    @property
-    def id_type(self):
-        return self.id.split(":")[0]
-
-    def add_search_text(self, text):
-        # Add text only if it's unique and if not empty string
-        if text and text not in self.search_text:
-            self.search_text.append(text)
-
-    def get_searchable_dict(self):
-        # Return a version of the identifier compatible with what's in ElasticSearch
-        es_ident = {
-            'id': self.id,
-            'label': self.label,
-            'equivalent_identifiers': self.equivalent_identifiers,
-            'type': self.types,
-            'synonyms': self.synonyms
-        }
-        return es_ident
-
-    def jsonable(self):
-        return self.__dict__
-
-
-class DugAnnotator:
-    def __init__(
-            self,
-            preprocessor: "Preprocessor",
-            annotator: "Annotator",
-            normalizer: "Normalizer",
-            synonym_finder: "SynonymFinder",
-            ontology_greenlist=[],
-    ):
-        self.preprocessor = preprocessor
-        self.annotator = annotator
-        self.normalizer = normalizer
-        self.synonym_finder = synonym_finder
-        self.ontology_greenlist = ontology_greenlist
-        self.norm_fails_file = "norm_fails.txt"
-        self.anno_fails_file = "anno_fails.txt"
-
-    def annotate(self, text, http_session):
-
-        # Preprocess text (debraviate, remove stopwords, etc.)
-        text = self.preprocessor.preprocess(text)
-
-        # Fetch identifiers
-        raw_identifiers = self.annotator.annotate(text, http_session)
-
-        # Write out to file if text fails to annotate
-        if not raw_identifiers:
-            with open(self.anno_fails_file, "a") as fh:
-                fh.write(f'{text}\n')
-
-        processed_identifiers = []
-        for identifier in raw_identifiers:
-
-            # Normalize identifier using normalization service
-            norm_id = self.normalizer.normalize(identifier, http_session)
-
-            # Skip adding id if it doesn't normalize
-            if norm_id is None:
-                # Write out to file if identifier doesn't normalize
-                with open(self.norm_fails_file, "a") as fh:
-                    fh.write(f'{identifier.id}\n')
-
-                # Discard non-normalized ident if not in greenlist
-                if identifier.id_type not in self.ontology_greenlist:
-                    continue
-
-                # If it is in greenlist just keep moving forward
-                norm_id = identifier
-
-            # Add synonyms to identifier
-            norm_id.synonyms = self.synonym_finder.get_synonyms(norm_id.id, http_session)
-
-            # Get pURL for ontology identifer for more info
-            norm_id.purl = BioLinkPURLerizer.get_curie_purl(norm_id.id)
-            processed_identifiers.append(norm_id)
-
-        return processed_identifiers
-
-
-class ConceptExpander:
-    def __init__(self, url, min_tranql_score=0.2):
-        self.url = url
-        self.min_tranql_score = min_tranql_score
-        self.include_node_keys = ["id", "name", "synonyms"]
-        self.include_edge_keys = []
-        self.tranql_headers = {"accept": "application/json", "Content-Type": "text/plain"}
-
-    def is_acceptable_answer(self, answer):
-        return True
-
-    def expand_identifier(self, identifier, query_factory, kg_filename, include_all_attributes=False):
-
-        answer_kgs = []
-
-        # Skip TranQL query if a file exists in the crawlspace exists already, but continue w/ answers
-        if os.path.exists(kg_filename):
-            logger.info(f"identifier {identifier} is already crawled. Skipping TranQL query.")
-            with open(kg_filename, 'r') as stream:
-                response = json.load(stream)
-        else:
-            query = query_factory.get_query(identifier)
-            logger.debug(query)
-            response = requests.post(
-                url=self.url,
-                headers=self.tranql_headers,
-                data=query).json()
-
-            # Case: Skip if empty KG
-            try:
-                if response["message"] == 'Internal Server Error' or len(response["message"]["knowledge_graph"]["nodes"]) == 0:
-                    logger.debug(f"Did not find a knowledge graph for {query}")
-                    logger.debug(f"{self.url} returned response: {response}")
-                    return []
-            except KeyError as e:
-                logger.error(f"Could not find key: {e} in response: {response}")
-
-            # Dump out to file if there's a knowledge graph
-            with open(kg_filename, 'w') as stream:
-                json.dump(response, stream, indent=2)
-
-        # Get nodes in knowledge graph hashed by ids for easy lookup
-        noMessage = (len(response.get("message",{})) == 0)
-        statusError = (response.get("status","") == 'Error')
-        if noMessage or statusError:
-            # Skip on error
-            logger.info(f"Error with identifier: {identifier}, response: {response}, kg_filename: '{kg_filename}'")
-            return []
-        kg = tql.QueryKG(response)
-
-        for answer in kg.answers:
-            # Filter out answers that don't meet some criteria
-            # Right now just don't filter anything
-            logger.debug(f"Answer: {answer}")
-            if not self.is_acceptable_answer(answer):
-                logger.warning("Skipping answer as it failed one or more acceptance criteria. See log for details.")
-                continue
-
-            # Get subgraph containing only information for this answer
-            try:
-                # Temporarily surround in try/except because sometimes the answer graphs
-                # contain invalid references to edges/nodes
-                # This will be fixed in Robokop but for now just silently warn if answer is invalid
-                node_attributes_filter = None if include_all_attributes else self.include_node_keys
-                edge_attributes_filter = None if include_all_attributes else self.include_edge_keys
-                answer_kg = kg.get_answer_subgraph(answer,
-                                                   include_node_keys=node_attributes_filter,
-                                                   include_edge_keys=edge_attributes_filter)
-
-                # Add subgraph to list of acceptable answers to query
-                answer_kgs.append(answer_kg)
-
-            except tql.MissingNodeReferenceError:
-                # TEMPORARY: Skip answers that have invalid node references
-                # Need this to be fixed in Robokop
-                logger.warning("Skipping answer due to presence of non-preferred id! "
-                               "See err msg for details.")
-                continue
-            except tql.MissingEdgeReferenceError:
-                # TEMPORARY: Skip answers that have invalid edge references
-                # Need this to be fixed in Robokop
-                logger.warning("Skipping answer due to presence of invalid edge reference! "
-                               "See err msg for details.")
-                continue
-
-        return answer_kgs
-
-
-class Preprocessor:
-    """"Class for preprocessing strings so they are better interpreted by NLP steps"""
-
-    def __init__(self, debreviator=None, stopwords=None):
-        if debreviator is None:
-            debreviator = self.default_debreviator_factory()
-        self.decoder = debreviator
-
-        if stopwords is None:
-            stopwords = []
-        self.stopwords = stopwords
-
-    def preprocess(self, text: str) -> str:
-        """
-        Apply debreviator to replace abbreviations and other characters
-
-        >>> pp = Preprocessor({"foo": "bar"}, ["baz"])
-        >>> pp.preprocess("Hello foo")
-        'Hello bar'
-
-        >>> pp.preprocess("Hello baz world")
-        'Hello world'
-        """
-
-        for key, value in self.decoder.items():
-            text = text.replace(key, value)
-
-        # Remove any stopwords
-        text = " ".join([word for word in text.split() if word not in self.stopwords])
-        return text
-
-    @staticmethod
-    def default_debreviator_factory():
-        return {"bmi": "body mass index", "_": " "}
-
-
-Input = TypeVar("Input")
-Output = TypeVar("Output")
-
-
-class ApiClient(Generic[Input, Output]):
-
-    def make_request(self, value: Input, http_session: Session):
-        raise NotImplementedError()
-
-    def handle_response(self, value, response: Union[dict, list]) -> Output:
-        raise NotImplementedError()
-
-    def __call__(self, value: Input, http_session: Session) -> Output:
-        response = self.make_request(value, http_session)
-
-        result = self.handle_response(value, response)
-
-        return result
-
-
-class Annotator(ApiClient[str, List[Identifier]]):
-    """
-    Use monarch API service to fetch ontology IDs found in text
-    """
-
-    def __init__(self, url: str):
-        self.url = url
-
-    def sliding_window(self, text, max_characters=2000, padding_words=5):
-        """
-        For long texts sliding window works as the following
-        "aaaa bbb ccc ddd eeee"
-        with a sliding max chars 8 and padding 1
-        first yeild would be "aaaa bbb"
-        next subsequent yeilds "bbb ccc", "ccc ddd" , "ddd eeee"
-        allowing context to be preserved with the scope of padding
-        For a text of length 7653 , with max_characters 2000 and padding 5 , 4 chunks are yielded.
-        """
-        words = text.split(' ')
-        total_words = len(words)
-        window_end = False
-        current_index = 0
-        while not window_end:
-            current_string = ""
-            for index, word in enumerate(words[current_index: ]):
-                if len(current_string) + len(word) + 1 >= max_characters:
-                    yield current_string + " "
-                    current_index += index - padding_words
-                    break
-                appendee = word if index == 0 else " " + word
-                current_string += appendee
-
-            if current_index + index == len(words) - 1:
-                window_end = True
-                yield current_string
-
-    def annotate(self, text, http_session):
-        logger.debug(f"Annotating: {text}")
-        identifiers = []
-        for chunk_text in self.sliding_window(text):
-            identifiers += self(chunk_text, http_session)
-        return identifiers
-
-    def make_request(self, value: Input, http_session: Session):
-        value = urllib.parse.quote(value)
-        url = f'{self.url}{value}'
-
-        # This could be moved to a config file
-        NUM_TRIES = 5
-        for _ in range(NUM_TRIES):
-           response = http_session.get(url)
-           if response is not None:
-              # looks like it worked
-              break
-
-        # if the reponse is still None here, throw an error         
-        if response is None:
-            raise RuntimeError(f"no response from {url}")
-        return response.json()
-
-    def handle_response(self, value, response: dict) -> List[Identifier]:
-        identifiers = []
-        """ Parse each identifier and initialize identifier object """
-        for span in response.get('spans', []):
-            search_text = span.get('text', None)
-            for token in span.get('token', []):
-                curie = token.get('id', None)
-                if not curie:
-                    continue
-
-                biolink_types = token.get('category')
-                label = token.get('terms')[0]
-                identifiers.append(Identifier(id=curie,
-                                              label=label,
-                                              types=biolink_types,
-                                              search_text=search_text))
-        return identifiers
-
-
-class Normalizer(ApiClient[Identifier, Identifier]):
-    def __init__(self, url):
-        self.bl_toolkit = bmt.Toolkit()
-        self.url = url
-
-    def normalize(self, identifier: Identifier, http_session: Session):
-        # Use RENCI's normalization API service to get the preferred version of an identifier
-        logger.debug(f"Normalizing: {identifier.id}")
-        return self(identifier, http_session)
-
-    def make_request(self, value: Identifier, http_session: Session) -> dict:
-        curie = value.id
-        url = f"{self.url}{urllib.parse.quote(curie)}"
-        try:
-            response = http_session.get(url)
-        except Exception as get_exc:
-            logger.info(f"Error normalizing {value} at {url}")
-            logger.error(f"Error {get_exc.__class__.__name__}: {get_exc}")
-            return {}
-        try:
-            normalized = response.json()
-        except Exception as json_exc:
-            logger.info(f"Error processing response: {response.text} (HTTP {response.status_code})")
-            logger.error(f"Error {json_exc.__class__.__name__}: {json_exc}")
-            return {}
-
-        return normalized
-
-    def handle_response(self, identifier: Identifier, normalized: dict) -> Optional[Identifier]:
-        """ Record normalized results. """
-        curie = identifier.id
-        normalization = normalized.get(curie, {})
-        if normalization is None:
-            logger.info(f"Normalization service did not return normalization for: {curie}")
-            return None
-
-        preferred_id = normalization.get("id", {})
-        equivalent_identifiers = normalization.get("equivalent_identifiers", [])
-        biolink_type = normalization.get("type", [])
-
-        # Return none if there isn't actually a preferred id
-        if 'identifier' not in preferred_id:
-            logger.debug(f"ERROR: normalize({curie})=>({preferred_id}). No identifier?")
-            return None
-
-        logger.debug(f"Preferred id: {preferred_id}")
-        identifier.id = preferred_id.get('identifier', '')
-        identifier.label = preferred_id.get('label', '')
-        identifier.description = preferred_id.get('description', '')
-        identifier.equivalent_identifiers = [v['identifier'] for v in equivalent_identifiers]        
-        try: 
-            identifier.types = self.bl_toolkit.get_element(biolink_type[0]).name
-        except:
-            # converts biolink:SmallMolecule to small molecule 
-            identifier.types = (" ".join(re.split("(?=[A-Z])", biolink_type[0].replace('biolink:', ''))[1:])).lower()
-        return identifier
-
-
-class SynonymFinder(ApiClient[str, List[str]]):
-
-    def __init__(self, url: str):
-        self.url = url
-
-    def get_synonyms(self, curie: str, http_session):
-        '''
-        This function uses the NCATS translator service to return a list of synonyms for
-        curie id
-        '''
-
-        return self(curie, http_session)
-
-    def make_request(self, curie: str, http_session: Session):
-        # Get response from namelookup reverse lookup op
-        # example (https://name-resolution-sri.renci.org/docs#/lookup/lookup_names_reverse_lookup_post)
-        url = f"{self.url}"
-        payload = {
-            'curies': [curie]
-        }
-        try:
-            response = http_session.post(url, json=payload)
-            if str(response.status_code).startswith('4'):
-                logger.error(f"No synonyms returned for: `{curie}`. Validation error: {response.text}")
-                return {curie: []}
-            if str(response.status_code).startswith('5'):
-                logger.error(f"No synonyms returned for: `{curie}`. Internal server error from {self.url}. Error: {response.text}")
-                return {curie: []}
-            return response.json()
-        except json.decoder.JSONDecodeError as e:
-            logger.error(f"Json parse error for response from `{url}`. Exception: {str(e)}")
-            return {curie: []}
-
-    def handle_response(self, curie: str, raw_synonyms: List[dict]) -> List[str]:
-        # Return curie synonyms
-        return raw_synonyms.get(curie, [])
-
-
-
-
-
-class BioLinkPURLerizer:
-    # Static class for the sole purpose of doing lookups of different ontology PURLs
-    # Is it pretty? No. But it gets the job done.
-    biolink_lookup = {"APO": "http://purl.obolibrary.org/obo/APO_",
-                      "Aeolus": "http://translator.ncats.nih.gov/Aeolus_",
-                      "BIOGRID": "http://identifiers.org/biogrid/",
-                      "BIOSAMPLE": "http://identifiers.org/biosample/",
-                      "BSPO": "http://purl.obolibrary.org/obo/BSPO_",
-                      "CAID": "http://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=",
-                      "CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
-                      "CHEMBL.COMPOUND": "http://identifiers.org/chembl.compound/",
-                      "CHEMBL.MECHANISM": "https://www.ebi.ac.uk/chembl/mechanism/inspect/",
-                      "CHEMBL.TARGET": "http://identifiers.org/chembl.target/",
-                      "CID": "http://pubchem.ncbi.nlm.nih.gov/compound/",
-                      "CL": "http://purl.obolibrary.org/obo/CL_",
-                      "CLINVAR": "http://identifiers.org/clinvar/",
-                      "CLO": "http://purl.obolibrary.org/obo/CLO_",
-                      "COAR_RESOURCE": "http://purl.org/coar/resource_type/",
-                      "CPT": "https://www.ama-assn.org/practice-management/cpt/",
-                      "CTD": "http://translator.ncats.nih.gov/CTD_",
-                      "ClinVarVariant": "http://www.ncbi.nlm.nih.gov/clinvar/variation/",
-                      "DBSNP": "http://identifiers.org/dbsnp/",
-                      "DGIdb": "https://www.dgidb.org/interaction_types",
-                      "DOID": "http://purl.obolibrary.org/obo/DOID_",
-                      "DRUGBANK": "http://identifiers.org/drugbank/",
-                      "DrugCentral": "http://translator.ncats.nih.gov/DrugCentral_",
-                      "EC": "http://www.enzyme-database.org/query.php?ec=",
-                      "ECTO": "http://purl.obolibrary.org/obo/ECTO_",
-                      "EDAM-DATA": "http://edamontology.org/data_",
-                      "EDAM-FORMAT": "http://edamontology.org/format_",
-                      "EDAM-OPERATION": "http://edamontology.org/operation_",
-                      "EDAM-TOPIC": "http://edamontology.org/topic_",
-                      "EFO": "http://identifiers.org/efo/",
-                      "ENSEMBL": "http://identifiers.org/ensembl/",
-                      "ExO": "http://purl.obolibrary.org/obo/ExO_",
-                      "FAO": "http://purl.obolibrary.org/obo/FAO_",
-                      "FB": "http://identifiers.org/fb/",
-                      "FBcv": "http://purl.obolibrary.org/obo/FBcv_",
-                      "FlyBase": "http://flybase.org/reports/",
-                      "GAMMA": "http://translator.renci.org/GAMMA_",
-                      "GO": "http://purl.obolibrary.org/obo/GO_",
-                      "GOLD.META": "http://identifiers.org/gold.meta/",
-                      "GOP": "http://purl.obolibrary.org/obo/go#",
-                      "GOREL": "http://purl.obolibrary.org/obo/GOREL_",
-                      "GSID": "https://scholar.google.com/citations?user=",
-                      "GTEx": "https://www.gtexportal.org/home/gene/",
-                      "HANCESTRO": "http://www.ebi.ac.uk/ancestro/ancestro_",
-                      "HCPCS": "http://purl.bioontology.org/ontology/HCPCS/",
-                      "HGNC": "http://identifiers.org/hgnc/",
-                      "HGNC.FAMILY": "http://identifiers.org/hgnc.family/",
-                      "HMDB": "http://identifiers.org/hmdb/",
-                      "HP": "http://purl.obolibrary.org/obo/HP_",
-                      "ICD0": "http://translator.ncats.nih.gov/ICD0_",
-                      "ICD10": "http://translator.ncats.nih.gov/ICD10_",
-                      "ICD9": "http://translator.ncats.nih.gov/ICD9_",
-                      "INCHI": "http://identifiers.org/inchi/",
-                      "INCHIKEY": "http://identifiers.org/inchikey/",
-                      "INTACT": "http://identifiers.org/intact/",
-                      "IUPHAR.FAMILY": "http://identifiers.org/iuphar.family/",
-                      "KEGG": "http://identifiers.org/kegg/",
-                      "LOINC": "http://loinc.org/rdf/",
-                      "MEDDRA": "http://identifiers.org/meddra/",
-                      "MESH": "http://identifiers.org/mesh/",
-                      "MGI": "http://identifiers.org/mgi/",
-                      "MI": "http://purl.obolibrary.org/obo/MI_",
-                      "MIR": "http://identifiers.org/mir/",
-                      "MONDO": "http://purl.obolibrary.org/obo/MONDO_",
-                      "MP": "http://purl.obolibrary.org/obo/MP_",
-                      "MSigDB": "https://www.gsea-msigdb.org/gsea/msigdb/",
-                      "MetaCyc": "http://translator.ncats.nih.gov/MetaCyc_",
-                      "NCBIGENE": "http://identifiers.org/ncbigene/",
-                      "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
-                      "NCIT": "http://purl.obolibrary.org/obo/NCIT_",
-                      "NDDF": "http://purl.bioontology.org/ontology/NDDF/",
-                      "NLMID": "https://www.ncbi.nlm.nih.gov/nlmcatalog/?term=",
-                      "OBAN": "http://purl.org/oban/",
-                      "OBOREL": "http://purl.obolibrary.org/obo/RO_",
-                      "OIO": "http://www.geneontology.org/formats/oboInOwl#",
-                      "OMIM": "http://purl.obolibrary.org/obo/OMIM_",
-                      "ORCID": "https://orcid.org/",
-                      "ORPHA": "http://www.orpha.net/ORDO/Orphanet_",
-                      "ORPHANET": "http://identifiers.org/orphanet/",
-                      "PANTHER.FAMILY": "http://identifiers.org/panther.family/",
-                      "PANTHER.PATHWAY": "http://identifiers.org/panther.pathway/",
-                      "PATO-PROPERTY": "http://purl.obolibrary.org/obo/pato#",
-                      "PDQ": "https://www.cancer.gov/publications/pdq#",
-                      "PHARMGKB.DRUG": "http://identifiers.org/pharmgkb.drug/",
-                      "PHARMGKB.PATHWAYS": "http://identifiers.org/pharmgkb.pathways/",
-                      "PHAROS": "http://pharos.nih.gov",
-                      "PMID": "http://www.ncbi.nlm.nih.gov/pubmed/",
-                      "PO": "http://purl.obolibrary.org/obo/PO_",
-                      "POMBASE": "http://identifiers.org/pombase/",
-                      "PR": "http://purl.obolibrary.org/obo/PR_",
-                      "PUBCHEM.COMPOUND": "http://identifiers.org/pubchem.compound/",
-                      "PUBCHEM.SUBSTANCE": "http://identifiers.org/pubchem.substance/",
-                      "PathWhiz": "http://smpdb.ca/pathways/#",
-                      "REACT": "http://www.reactome.org/PathwayBrowser/#/",
-                      "REPODB": "http://apps.chiragjpgroup.org/repoDB/",
-                      "RGD": "http://identifiers.org/rgd/",
-                      "RHEA": "http://identifiers.org/rhea/",
-                      "RNACENTRAL": "http://identifiers.org/rnacentral/",
-                      "RO": "http://purl.obolibrary.org/obo/RO_",
-                      "RTXKG1": "http://kg1endpoint.rtx.ai/",
-                      "RXNORM": "http://purl.bioontology.org/ontology/RXNORM/",
-                      "ResearchID": "https://publons.com/researcher/",
-                      "SEMMEDDB": "https://skr3.nlm.nih.gov/SemMedDB",
-                      "SGD": "http://identifiers.org/sgd/",
-                      "SIO": "http://semanticscience.org/resource/SIO_",
-                      "SMPDB": "http://identifiers.org/smpdb/",
-                      "SNOMEDCT": "http://identifiers.org/snomedct/",
-                      "SNPEFF": "http://translator.ncats.nih.gov/SNPEFF_",
-                      "ScopusID": "https://www.scopus.com/authid/detail.uri?authorId=",
-                      "TAXRANK": "http://purl.obolibrary.org/obo/TAXRANK_",
-                      "UBERGRAPH": "http://translator.renci.org/ubergraph-axioms.ofn#",
-                      "UBERON": "http://purl.obolibrary.org/obo/UBERON_",
-                      "UBERON_CORE": "http://purl.obolibrary.org/obo/uberon/core#",
-                      "UMLS": "http://identifiers.org/umls/",
-                      "UMLSSC": "https://metamap.nlm.nih.gov/Docs/SemanticTypes_2018AB.txt/code#",
-                      "UMLSSG": "https://metamap.nlm.nih.gov/Docs/SemGroups_2018.txt/group#",
-                      "UMLSST": "https://metamap.nlm.nih.gov/Docs/SemanticTypes_2018AB.txt/type#",
-                      "UNII": "http://identifiers.org/unii/",
-                      "UPHENO": "http://purl.obolibrary.org/obo/UPHENO_",
-                      "UniProtKB": "http://identifiers.org/uniprot/",
-                      "VANDF": "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF/",
-                      "VMC": "https://github.com/ga4gh/vr-spec/",
-                      "WB": "http://identifiers.org/wb/",
-                      "WBPhenotype": "http://purl.obolibrary.org/obo/WBPhenotype_",
-                      "WBVocab": "http://bio2rdf.org/wormbase_vocabulary",
-                      "WIKIDATA": "https://www.wikidata.org/wiki/",
-                      "WIKIDATA_PROPERTY": "https://www.wikidata.org/wiki/Property:",
-                      "WIKIPATHWAYS": "http://identifiers.org/wikipathways/",
-                      "WormBase": "https://www.wormbase.org/get?name=",
-                      "ZFIN": "http://identifiers.org/zfin/",
-                      "ZP": "http://purl.obolibrary.org/obo/ZP_",
-                      "alliancegenome": "https://www.alliancegenome.org/",
-                      "biolink": "https://w3id.org/biolink/vocab/",
-                      "biolinkml": "https://w3id.org/biolink/biolinkml/",
-                      "chembio": "http://translator.ncats.nih.gov/chembio_",
-                      "dcterms": "http://purl.org/dc/terms/",
-                      "dictyBase": "http://dictybase.org/gene/",
-                      "doi": "https://doi.org/",
-                      "fabio": "http://purl.org/spar/fabio/",
-                      "foaf": "http://xmlns.com/foaf/0.1/",
-                      "foodb.compound": "http://foodb.ca/compounds/",
-                      "gff3": "https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#",
-                      "gpi": "https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#",
-                      "gtpo": "https://rdf.guidetopharmacology.org/ns/gtpo#",
-                      "hetio": "http://translator.ncats.nih.gov/hetio_",
-                      "interpro": "https://www.ebi.ac.uk/interpro/entry/",
-                      "isbn": "https://www.isbn-international.org/identifier/",
-                      "isni": "https://isni.org/isni/",
-                      "issn": "https://portal.issn.org/resource/ISSN/",
-                      "medgen": "https://www.ncbi.nlm.nih.gov/medgen/",
-                      "oboformat": "http://www.geneontology.org/formats/oboInOWL#",
-                      "pav": "http://purl.org/pav/",
-                      "prov": "http://www.w3.org/ns/prov#",
-                      "qud": "http://qudt.org/1.1/schema/qudt#",
-                      "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
-                      "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
-                      "skos": "https://www.w3.org/TR/skos-reference/#",
-                      "wgs": "http://www.w3.org/2003/01/geo/wgs84_pos",
-                      "xsd": "http://www.w3.org/2001/XMLSchema#",
-                      "@vocab": "https://w3id.org/biolink/vocab/"}
-
-    @staticmethod
-    def get_curie_purl(curie):
-        # Split into prefix and suffix
-        suffix = curie.split(":")[1]
-        prefix = curie.split(":")[0]
-
-        # Check to see if the prefix exists in the hash
-        if prefix not in BioLinkPURLerizer.biolink_lookup:
-            return None
-
-        return f"{BioLinkPURLerizer.biolink_lookup[prefix]}{suffix}"
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()
diff --git a/src/dug/core/annotators/__init__.py b/src/dug/core/annotators/__init__.py
new file mode 100644
index 00000000..60b43df8
--- /dev/null
+++ b/src/dug/core/annotators/__init__.py
@@ -0,0 +1,58 @@
+import logging
+from typing import Dict
+
+import pluggy
+
+from dug.config import Config
+from dug.core.annotators._base import DugIdentifier, Indexable, Annotator, DefaultNormalizer, DefaultSynonymFinder
+from dug.core.annotators.monarch_annotator import AnnotateMonarch
+from dug.core.annotators.sapbert_annotator import AnnotateSapbert
+
+logger = logging.getLogger('dug')
+
+hookimpl = pluggy.HookimplMarker("dug")
+
+@hookimpl
+def define_annotators(annotator_dict: Dict[str, Annotator], config: Config):
+    annotator_dict["monarch"] = build_monarch_annotator("monarch", config=config)
+    annotator_dict["sapbert"] = build_sapbert_annotator("sapbert", config=config)
+
+
+class AnnotatorNotFoundException(Exception):
+    ...
+
+
+def get_annotator(hook, annotator_name, config: Config) -> Annotator:
+    """Get the annotator from all annotators registered via the define_annotators hook"""
+
+    available_annotators = {}
+    hook.define_annotators(annotator_dict=available_annotators, config=config)
+    annotator = available_annotators.get(annotator_name.lower())
+    if annotator is not None:
+        logger.info(f'Annotating with {annotator}')
+        return annotator
+
+    err_msg = f"Cannot find annotator of type '{annotator_name}'\n" \
+              f"Supported annotators: {', '.join(available_annotators.keys())}"
+    logger.error(err_msg)
+    raise AnnotatorNotFoundException(err_msg)
+
+def build_monarch_annotator(annotate_type: str, config: Config):    
+    logger.info(f"Building Monarch annotator with args: {config.annotator_args[annotate_type]}")
+    annotator = AnnotateMonarch(
+        normalizer=DefaultNormalizer(**config.normalizer),
+        synonym_finder=DefaultSynonymFinder(**config.synonym_service),
+        config=config,
+        **config.annotator_args[annotate_type]
+    )
+    return annotator
+
+def build_sapbert_annotator(annotate_type, config: Config):
+    logger.info(f"Building Sapbert annotator with args: {config.annotator_args[annotate_type]}")
+    annotator = AnnotateSapbert(
+        normalizer=DefaultNormalizer(**config.normalizer),
+        synonym_finder=DefaultSynonymFinder(**config.synonym_service),
+        **config.annotator_args[annotate_type]
+    )
+    return annotator
+
diff --git a/src/dug/core/annotators/_base.py b/src/dug/core/annotators/_base.py
new file mode 100644
index 00000000..05890517
--- /dev/null
+++ b/src/dug/core/annotators/_base.py
@@ -0,0 +1,233 @@
+import json
+import logging
+import re
+import logging
+import urllib.parse
+from typing import Union, Callable, Any, Iterable, TypeVar, Generic, List, Optional
+from dug import utils as utils
+from requests import Session
+import bmt
+
+logger = logging.getLogger("dug")
+
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+class DugIdentifier:
+    """Core information about a concept, produced from annotator request
+
+    The Dug Identifier is the core piece of information about a concept that
+    produced from a request to an annotator based on a some original source of
+    data.
+
+    \n The information that is being stored is mostly meant to support the
+    Monarch API but should be adjusted accordingly to suit new Annotators needs
+    in the future.
+    \n The information that will be needed for all annotators are:
+        \n id: The CURIE identifier
+        \n label: The CURIE identifier
+        \n description: The CURIE identifier
+    \n When there is another supported Normalizer it will be seperated into a
+    separate plugin like annotator.
+    """
+
+    def __init__(self, id, label, types=None, search_text="", description=""):
+        "custom init stores parameters to initial values"
+
+        self.id = id
+        self.label = label
+        self.description = description
+        if types is None:
+            types = []
+        self.types = types
+        self.search_text = sorted([search_text]) if search_text else []
+        self.equivalent_identifiers = []
+        self.synonyms = []
+        self.purl = ""
+
+    @property
+    def id_type(self):
+        return self.id.split(":")[0]
+
+    def add_search_text(self, text):
+        "Add text only if it's unique and if not empty string"
+        if text and text not in self.search_text:
+            self.search_text = sorted(self.search_text + [text])
+
+    def get_searchable_dict(self):
+        "Return version of identifier compatible with what's in ElasticSearch"
+        es_ident = {
+            "id": self.id,
+            "label": self.label,
+            "equivalent_identifiers": self.equivalent_identifiers,
+            "type": self.types,
+            "synonyms": self.synonyms,
+        }
+        return es_ident
+
+    def jsonable(self):
+        "Output pickleable object (used by utils.complex_handler)"
+        return self.__dict__
+
+
+    def __str__(self):
+        return json.dumps(self.__dict__, indent=2, default=utils.complex_handler)
+
+
+Input = TypeVar("Input")
+Output = TypeVar("Output")
+
+
+class AnnotatorSession(Generic[Input, Output]):
+    def make_request(self, value: Input, http_session: Session):
+        raise NotImplementedError()
+
+    def handle_response(self, value, response: Union[dict, list]) -> Output:
+        raise NotImplementedError()
+
+    def __call__(self, value: Input, http_session: Session) -> Output:
+        response = self.make_request(value, http_session)
+
+        result = self.handle_response(value, response)
+
+        return result
+
+
+class DefaultNormalizer():
+    """Default concept normalizer class
+
+    After annotation there must be a Normalizing step to collasce equivalent
+    concepts into one official concept. This is a needed step for the knowledge
+    graph to map between different concepts.
+
+    The reason why this class in integrated into the annotators.py is because
+    currently there is only one supported Normalizer through the NCATs
+    Translator API.
+
+    When there is another supported Normalizer it will be seperated into a
+    separate plugin like annotator.
+    """
+
+    def __init__(self, url):
+        self.bl_toolkit = bmt.Toolkit()
+        self.url = url
+
+    def __call__(self, identifier: DugIdentifier, http_session: Session) -> DugIdentifier:
+        # Use RENCI's normalization API service to get the preferred version of an identifier
+        logger.debug(f"Normalizing: {identifier.id}")
+        response = self.make_request(identifier, http_session)
+        result = self.handle_response(identifier, response)
+        return result
+
+    def make_request(self, value: DugIdentifier, http_session: Session) -> dict:
+        curie = value.id
+        url = f"{self.url}{urllib.parse.quote(curie)}"
+        try:
+            response = http_session.get(url)
+        except Exception as get_exc:
+            logger.info(f"Error normalizing {value} at {url}")
+            logger.error(f"Error {get_exc.__class__.__name__}: {get_exc}")
+            return {}
+        try:
+            normalized = response.json()
+        except Exception as json_exc:
+            logger.info(
+                f"Error processing response: {response.text} (HTTP {response.status_code})"
+            )
+            logger.error(f"Error {json_exc.__class__.__name__}: {json_exc}")
+            return {}
+
+        return normalized
+
+    def handle_response(
+        self, identifier: DugIdentifier, normalized: dict
+    ) -> Optional[DugIdentifier]:
+        """Record normalized results."""
+        curie = identifier.id
+        normalization = normalized.get(curie, {})
+        if normalization is None:
+            logger.info(
+                f"Normalization service did not return normalization for: {curie}"
+            )
+            return None
+
+        preferred_id = normalization.get("id", {})
+        equivalent_identifiers = normalization.get("equivalent_identifiers", [])
+        biolink_type = normalization.get("type", [])
+
+        # Return none if there isn't actually a preferred id
+        if "identifier" not in preferred_id:
+            logger.debug(f"ERROR: normalize({curie})=>({preferred_id}). No identifier?")
+            return None
+
+        logger.debug(f"Preferred id: {preferred_id}")
+        identifier.id = preferred_id.get("identifier", "")
+        identifier.label = preferred_id.get("label", "")
+        identifier.description = preferred_id.get("description", "")
+        identifier.equivalent_identifiers = [
+            v["identifier"] for v in equivalent_identifiers
+        ]
+        try:
+            identifier.types = self.bl_toolkit.get_element(biolink_type[0]).name
+        except:
+            # converts biolink:SmallMolecule to small molecule
+            identifier.types = (
+                " ".join(
+                    re.split("(?=[A-Z])", biolink_type[0].replace("biolink:", ""))[1:]
+                )
+            ).lower()
+        return identifier
+
+
+class DefaultSynonymFinder():
+    """ The SynonymFinder stores synonyms for concepts in the knowledge graph so users in the Dug User Interface can find concepts that match their search criteria. 
+    \n The reason why this class in integrated into the annotators.py is because currently there is only one supported SynonymFinder through the deployed by RENCI.
+    \n When there is another supported SynonymFinder it will be seperated into a separate plugin like annotator.
+    """
+
+    def __init__(self, url: str):
+        self.url = url
+
+    # def get_identifier_synonyms
+    def __call__(self, curie: str, http_session):
+        """
+        This function uses the NCATS translator service to return a list of synonyms for
+        curie id
+        """
+        response = self.make_request(curie, http_session)
+        result = self.handle_response(curie, response)
+        return result
+
+    def make_request(self, curie: str, http_session: Session):
+        # Get response from namelookup reverse lookup op
+        # example (https://name-resolution-sri.renci.org/docs#/lookup/lookup_names_reverse_lookup_post)
+        url = f"{self.url}"
+        payload = {"curies": [curie]}
+        try:
+            response = http_session.post(url, json=payload)
+            if str(response.status_code).startswith("4"):
+                logger.error(
+                    f"No synonyms returned for: `{curie}`. Validation error: {response.text}"
+                )
+                return {curie: {"names": []}}
+            if str(response.status_code).startswith("5"):
+                logger.error(
+                    f"No synonyms returned for: `{curie}`. Internal server error from {self.url}. Error: {response.text}"
+                )
+                return {curie: {"names": []}}
+            return response.json()
+        except json.decoder.JSONDecodeError as e:
+            logger.error(
+                f"Json parse error for response from `{url}`. Exception: {str(e)}"
+            )
+            return {curie: {"names": []}}
+
+    def handle_response(self, curie: str, raw_synonyms: List[dict]) -> List[str]:
+        # Return curie synonyms
+        return raw_synonyms.get(curie, {}).get('names', [])
+
+
+Indexable = Union[DugIdentifier, AnnotatorSession]
+# Indexable = DugIdentifier
+Annotator = Callable[[Any], Iterable[Indexable]]
+# Annotator = Callable[[Any], Iterable[DugIdentifier]]
diff --git a/src/dug/core/annotators/monarch_annotator.py b/src/dug/core/annotators/monarch_annotator.py
new file mode 100644
index 00000000..e50e3177
--- /dev/null
+++ b/src/dug/core/annotators/monarch_annotator.py
@@ -0,0 +1,176 @@
+import logging
+import urllib.parse
+from typing import List
+from requests import Session
+
+from dug.core.annotators._base import DugIdentifier, Input
+from dug.core.annotators.utils.biolink_purl_util import BioLinkPURLerizer
+
+logger = logging.getLogger('dug')
+
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+class AnnotateMonarch:
+    """
+    Use monarch API service to fetch ontology IDs found in text
+    """
+    def __init__(
+            self,
+            normalizer,
+            synonym_finder,
+            config,
+            ontology_greenlist=[],
+            **kwargs
+    ):
+
+        self.annotatorUrl = kwargs['url']
+        self.normalizer = normalizer
+        self.synonym_finder = synonym_finder
+        self.ontology_greenlist = ontology_greenlist
+        self.norm_fails_file = "norm_fails.txt"
+        self.anno_fails_file = "anno_fails.txt"
+
+        debreviator = config.preprocessor['debreviator'] if 'debreviator' in config.preprocessor else None
+        stopwords = config.preprocessor['stopwords'] if 'stopwords' in  config.preprocessor else None
+
+        if debreviator is None:
+            debreviator = self.default_debreviator_factory()
+        self.decoder = debreviator
+
+        if stopwords is None:
+            stopwords = []
+        self.stopwords = stopwords
+
+    def __call__(self, text, http_session) -> List[DugIdentifier]:
+        # Preprocess text (debraviate, remove stopwords, etc.)
+        text = self.preprocess_text(text)
+
+        # Fetch identifiers
+        raw_identifiers = self.annotate_text(text, http_session)
+
+        # Write out to file if text fails to annotate
+        if not raw_identifiers:
+            with open(self.anno_fails_file, "a") as fh:
+                fh.write(f'{text}\n')
+
+        processed_identifiers = []
+        for identifier in raw_identifiers:
+
+            # Normalize identifier using normalization service
+            norm_id = self.normalizer(identifier, http_session)
+
+            # Skip adding id if it doesn't normalize
+            if norm_id is None:
+                # Write out to file if identifier doesn't normalize
+                with open(self.norm_fails_file, "a") as fh:
+                    fh.write(f'{identifier.id}\n')
+
+                # Discard non-normalized ident if not in greenlist
+                if identifier.id_type not in self.ontology_greenlist:
+                    continue
+
+                # If it is in greenlist just keep moving forward
+                norm_id = identifier
+
+            # Add synonyms to identifier
+            norm_id.synonyms = self.synonym_finder(norm_id.id, http_session)
+
+            # Get pURL for ontology identifer for more info
+            norm_id.purl = BioLinkPURLerizer.get_curie_purl(norm_id.id)
+            processed_identifiers.append(norm_id)
+
+        return processed_identifiers
+    
+    def sliding_window(self, text, max_characters=2000, padding_words=5):
+        """
+        For long texts sliding window works as the following
+        "aaaa bbb ccc ddd eeee"
+        with a sliding max chars 8 and padding 1
+        first yeild would be "aaaa bbb"
+        next subsequent yeilds "bbb ccc", "ccc ddd" , "ddd eeee"
+        allowing context to be preserved with the scope of padding
+        For a text of length 7653 , with max_characters 2000 and padding 5 , 4 chunks are yielded.
+        """
+        words = text.split(' ')
+        total_words = len(words)
+        window_end = False
+        current_index = 0
+        while not window_end:
+            current_string = ""
+            for index, word in enumerate(words[current_index: ]):
+                if len(current_string) + len(word) + 1 >= max_characters:
+                    yield current_string + " "
+                    current_index += index - padding_words
+                    break
+                appendee = word if index == 0 else " " + word
+                current_string += appendee
+
+            if current_index + index == len(words) - 1:
+                window_end = True
+                yield current_string
+
+    def annotate_text(self, text, http_session) -> List[DugIdentifier]:
+        logger.debug(f"Annotating: {text}")
+        identifiers = []
+        for chunk_text in self.sliding_window(text):
+            response = self.make_request(chunk_text, http_session)
+            identifiers += self.handle_response(chunk_text, response)
+        return identifiers
+
+    def make_request(self, value: Input, http_session: Session):
+        value = urllib.parse.quote(value)
+        url = f'{self.annotatorUrl}{value}'
+
+        # This could be moved to a config file
+        NUM_TRIES = 5
+        for _ in range(NUM_TRIES):
+            response = http_session.get(url)
+            if response is not None:
+              # looks like it worked
+                break
+        # if the reponse is still None here, throw an error         
+        if response is None:
+            raise RuntimeError(f"no response from {url}")
+        return response.json()
+
+    def handle_response(self, value, response: dict) -> List[DugIdentifier]:
+        identifiers = []
+        """ Parse each identifier and initialize identifier object """
+        for span in response.get('spans', []):
+            search_text = span.get('text', None)
+            for token in span.get('token', []):
+                curie = token.get('id', None)
+                if not curie:
+                    continue
+
+                biolink_types = token.get('category')
+                label = token.get('terms')[0]
+                identifiers.append(DugIdentifier(id=curie,
+                                              label=label,
+                                              types=biolink_types,
+                                              search_text=search_text))
+        return identifiers
+    
+    def preprocess_text(self, text: str) -> str:
+        """
+        Apply debreviator to replace abbreviations and other characters
+
+        # >>> pp = PreprocessorMonarch({"foo": "bar"}, ["baz"])
+        # >>> pp.preprocess("Hello foo")
+        # 'Hello bar'
+
+        # >>> pp.preprocess("Hello baz world")
+        'Hello world'
+        """
+
+        for key, value in self.decoder.items():
+            text = text.replace(key, value)
+
+        # Remove any stopwords
+        text = " ".join([word for word in text.split() if word not in self.stopwords])
+        return text
+
+    @staticmethod
+    def default_debreviator_factory():
+        return {"bmi": "body mass index", "_": " "}
\ No newline at end of file
diff --git a/src/dug/core/annotators/sapbert_annotator.py b/src/dug/core/annotators/sapbert_annotator.py
new file mode 100644
index 00000000..6f2c93a6
--- /dev/null
+++ b/src/dug/core/annotators/sapbert_annotator.py
@@ -0,0 +1,248 @@
+import logging
+from typing import List
+from requests import Session
+import json
+
+from dug.core.annotators._base import DugIdentifier, Input
+from dug.core.annotators.utils.biolink_purl_util import BioLinkPURLerizer
+
+logger = logging.getLogger("dug")
+
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+
+class AnnotateSapbert:
+    """
+    Use the RENCI Sapbert API service to fetch ontology IDs found in text
+    """
+
+    def __init__(
+        self,
+        normalizer,
+        synonym_finder,
+        ontology_greenlist=[],
+        **kwargs
+    ):
+        self.classificationUrl = kwargs.get('classification_url')
+        self.annotatorUrl = kwargs.get('annotator_url')
+        if not self.classificationUrl:
+            raise TypeError('Classification url needs to be defined for sapbert annotator')
+        if not self.annotatorUrl:
+            raise TypeError('Annotator url needs to be defined for sapbert annotator')
+        self.normalizer = normalizer
+        self.synonym_finder = synonym_finder
+        self.ontology_greenlist = ontology_greenlist
+        self.norm_fails_file = "norm_fails.txt"
+        self.anno_fails_file = "anno_fails.txt"
+
+    def __call__(self, text, http_session) -> List[DugIdentifier]:
+        # Fetch identifiers
+        classifiers: List = self.text_classification(text, http_session)
+
+        raw_identifiers: List[DugIdentifier] = self.annotate_classifiers(
+            classifiers, http_session
+        )
+
+        # Write out to file if text fails to annotate
+        if not raw_identifiers:
+            with open(self.anno_fails_file, "a") as fh:
+                fh.write(f"{text}\n")
+
+        processed_identifiers = []
+        for identifier in raw_identifiers:
+            # Normalize identifier using normalization service
+            norm_id = self.normalizer(identifier, http_session)
+
+            # Skip adding id if it doesn't normalize
+            if norm_id is None:
+                # Write out to file if identifier doesn't normalize
+                with open(self.norm_fails_file, "a") as fh:
+                    fh.write(f"{identifier.id}\n")
+
+                # Discard non-normalized ident if not in greenlist
+                if identifier.id_type not in self.ontology_greenlist:
+                    continue
+
+                # If it is in greenlist just keep moving forward
+                norm_id = identifier
+
+            # Add synonyms to identifier
+            norm_id.synonyms = self.synonym_finder(norm_id.id, http_session)
+
+            # Get pURL for ontology identifer for more info
+            norm_id.purl = BioLinkPURLerizer.get_curie_purl(norm_id.id)
+            processed_identifiers.append(norm_id)
+
+        return processed_identifiers
+
+    def text_classification(self, text, http_session) -> List:
+        """
+        Send variable text to a token classifier API and return list of classified terms and biolink types
+
+        Param:
+          text: String -- Full variable text, API does text preprocessing
+
+        Request:
+          {
+              "text": "{{text}}",
+              "model_name": "token_classification"
+          }
+
+        Response: List of dicts from which we want to extract the following:
+          {
+              "obj": "{{Biolink Classification}}",
+              "text": "{{Classified Term}}"
+          }
+
+        Returns: List Dicts each with a Classified Term and Biolink Classification
+        """
+        logger.debug(f"Classification")
+        response = self.make_classification_request(text, http_session)
+        classifiers = self.handle_classification_response(response)
+        return classifiers
+
+    def make_classification_request(self, text: Input, http_session: Session):
+        url = self.classificationUrl
+        logger.debug(f"response from {text}")
+        payload = {
+            "text": text,
+            "model_name": "token_classification",
+        }
+        # This could be moved to a config file
+        NUM_TRIES = 5
+        for _ in range(NUM_TRIES):
+            response = http_session.post(url, json=payload)
+            if response is not None:
+                # looks like it worked
+                break
+        # if the reponse is still None here, throw an error
+        if response is None:
+            raise RuntimeError(f"no response from {url}")
+        if response.status_code == 403:
+            raise RuntimeError(f"You are not authorized to use this API -- {url}")
+        if response.status_code == 500:
+            raise RuntimeError(f"Classification API is temporarily down -- vist docs here: {url.replace('annotate', 'docs')}")
+        return response.json()
+
+    def handle_classification_response(self, response: dict) -> List:
+        classifiers = []
+        """ Parse each identifier and initialize identifier object """
+        for denotation in response.get("denotations", []):
+            text = denotation.get("text", None)
+            bl_type = denotation.get("obj", None)
+            classifiers.append(
+                {"text": text, "bl_type": bl_type.replace("biolink:", "")}
+            )
+        return classifiers
+
+    def annotate_classifiers(
+        self, classifiers: List, http_session
+    ) -> List[DugIdentifier]:
+        """
+        Send Classified Terms to Sapbert API
+
+        Param:
+          List: [
+              term_dict: Dict {
+                  "text": String -- Classified term received from token classification API
+                  "bl_type": String -- Biolink Classification
+              }
+          ]
+
+        Request:
+          {
+              "text": "{{term_dict['text']}}",
+              "model_name": "sapbert",
+              "count": {{Limits the number of results}},
+              "args": {
+                  "bl_type": "{{ term_dict['bl_type'] -- NOTE omit `biolink:`}}"
+              }
+          }
+
+        Response: List of dicts with the following structure:
+              {
+                  "name": "{{Identified Name}}",
+                  "curie": "{{Curie ID}}",
+                  "category": "{{Biolink term with `biolink:`}}",
+                  "score": "{{Float confidence in the annotation}}"
+              }
+          TBD: Organize the results by highest score
+          Return: List of DugIdentifiers with a Curie ID
+        """
+        identifiers = []
+        for term_dict in classifiers:
+            logger.debug(f"Annotating: {term_dict['text']}")
+
+            response = self.make_annotation_request(term_dict, http_session)
+            identifiers += self.handle_annotation_response(term_dict, response)
+
+        return identifiers
+
+    def make_annotation_request(self, term_dict: Input, http_session: Session):
+        url = self.annotatorUrl
+        payload = {
+            "text": term_dict["text"],
+            "model_name": "sapbert",
+            "count": 1000,
+            "args": {"bl_type": term_dict["bl_type"]},
+        }
+        # This could be moved to a config file
+        NUM_TRIES = 5
+        for _ in range(NUM_TRIES):
+            response = http_session.post(url, json=payload)
+            if response is not None:
+                # looks like it worked
+                break
+        # if the reponse is still None here, throw an error
+        if response is None:
+            raise RuntimeError(f"no response from {url}")
+        if response.status_code == 403:
+            raise RuntimeError(f"You are not authorized to use this API -- {url}")
+        if response.status_code == 500:
+            raise RuntimeError(f"Annotation API is temporarily down -- vist docs here: {url.replace('annotate', 'docs')}")
+        return response.json()
+
+    def handle_annotation_response(self, value, response: dict) -> List[DugIdentifier]:
+        identifiers = []
+        """ Parse each identifier and initialize identifier object """
+        for identifier in response:
+            search_text = value.get("text", None)
+            curie = identifier.get("curie", None)
+            if not curie:
+                continue
+
+            biolink_type = identifier.get('category')
+            score = identifier.get("score", None)
+            label = identifier.get("name")
+            identifiers.append(
+                DugIdentifier(id=curie, label=label, types=[biolink_type], search_text=search_text)
+            )
+        return identifiers
+
+## Testing Purposes
+# if __name__ == "__main__":
+#     from dug.config import Config
+#     import json
+#     import redis
+#     from requests_cache import CachedSession
+#     from dug.core.annotators._base import DefaultNormalizer, DefaultSynonymFinder
+
+#     config = Config.from_env()
+#     annotator = AnnotateSapbert(
+#         normalizer=DefaultNormalizer(**config.normalizer),
+#         synonym_finder=DefaultSynonymFinder(**config.synonym_service),
+#     )
+
+#     redis_config = {
+#         "host": "localhost",
+#         "port": config.redis_port,
+#         "password": config.redis_password,
+#     }
+
+#     http_sesh = CachedSession(
+#         cache_name="annotator",
+#         backend="redis",
+#         connection=redis.StrictRedis(**redis_config),
+#     )
+#     annotator(text="Have you ever had a heart attack?", http_session=http_sesh)
diff --git a/src/dug/core/annotators/utils/__init__.py b/src/dug/core/annotators/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/dug/core/annotators/utils/biolink_purl_util.py b/src/dug/core/annotators/utils/biolink_purl_util.py
new file mode 100644
index 00000000..1cbc8a53
--- /dev/null
+++ b/src/dug/core/annotators/utils/biolink_purl_util.py
@@ -0,0 +1,175 @@
+class BioLinkPURLerizer:
+    # Static class for the sole purpose of doing lookups of different ontology PURLs
+    # Is it pretty? No. But it gets the job done.
+    biolink_lookup = {"APO": "http://purl.obolibrary.org/obo/APO_",
+                      "Aeolus": "http://translator.ncats.nih.gov/Aeolus_",
+                      "BIOGRID": "http://identifiers.org/biogrid/",
+                      "BIOSAMPLE": "http://identifiers.org/biosample/",
+                      "BSPO": "http://purl.obolibrary.org/obo/BSPO_",
+                      "CAID": "http://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=",
+                      "CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
+                      "CHEMBL.COMPOUND": "http://identifiers.org/chembl.compound/",
+                      "CHEMBL.MECHANISM": "https://www.ebi.ac.uk/chembl/mechanism/inspect/",
+                      "CHEMBL.TARGET": "http://identifiers.org/chembl.target/",
+                      "CID": "http://pubchem.ncbi.nlm.nih.gov/compound/",
+                      "CL": "http://purl.obolibrary.org/obo/CL_",
+                      "CLINVAR": "http://identifiers.org/clinvar/",
+                      "CLO": "http://purl.obolibrary.org/obo/CLO_",
+                      "COAR_RESOURCE": "http://purl.org/coar/resource_type/",
+                      "CPT": "https://www.ama-assn.org/practice-management/cpt/",
+                      "CTD": "http://translator.ncats.nih.gov/CTD_",
+                      "ClinVarVariant": "http://www.ncbi.nlm.nih.gov/clinvar/variation/",
+                      "DBSNP": "http://identifiers.org/dbsnp/",
+                      "DGIdb": "https://www.dgidb.org/interaction_types",
+                      "DOID": "http://purl.obolibrary.org/obo/DOID_",
+                      "DRUGBANK": "http://identifiers.org/drugbank/",
+                      "DrugCentral": "http://translator.ncats.nih.gov/DrugCentral_",
+                      "EC": "http://www.enzyme-database.org/query.php?ec=",
+                      "ECTO": "http://purl.obolibrary.org/obo/ECTO_",
+                      "EDAM-DATA": "http://edamontology.org/data_",
+                      "EDAM-FORMAT": "http://edamontology.org/format_",
+                      "EDAM-OPERATION": "http://edamontology.org/operation_",
+                      "EDAM-TOPIC": "http://edamontology.org/topic_",
+                      "EFO": "http://identifiers.org/efo/",
+                      "ENSEMBL": "http://identifiers.org/ensembl/",
+                      "ExO": "http://purl.obolibrary.org/obo/ExO_",
+                      "FAO": "http://purl.obolibrary.org/obo/FAO_",
+                      "FB": "http://identifiers.org/fb/",
+                      "FBcv": "http://purl.obolibrary.org/obo/FBcv_",
+                      "FlyBase": "http://flybase.org/reports/",
+                      "GAMMA": "http://translator.renci.org/GAMMA_",
+                      "GO": "http://purl.obolibrary.org/obo/GO_",
+                      "GOLD.META": "http://identifiers.org/gold.meta/",
+                      "GOP": "http://purl.obolibrary.org/obo/go#",
+                      "GOREL": "http://purl.obolibrary.org/obo/GOREL_",
+                      "GSID": "https://scholar.google.com/citations?user=",
+                      "GTEx": "https://www.gtexportal.org/home/gene/",
+                      "HANCESTRO": "http://www.ebi.ac.uk/ancestro/ancestro_",
+                      "HCPCS": "http://purl.bioontology.org/ontology/HCPCS/",
+                      "HGNC": "http://identifiers.org/hgnc/",
+                      "HGNC.FAMILY": "http://identifiers.org/hgnc.family/",
+                      "HMDB": "http://identifiers.org/hmdb/",
+                      "HP": "http://purl.obolibrary.org/obo/HP_",
+                      "ICD0": "http://translator.ncats.nih.gov/ICD0_",
+                      "ICD10": "http://translator.ncats.nih.gov/ICD10_",
+                      "ICD9": "http://translator.ncats.nih.gov/ICD9_",
+                      "INCHI": "http://identifiers.org/inchi/",
+                      "INCHIKEY": "http://identifiers.org/inchikey/",
+                      "INTACT": "http://identifiers.org/intact/",
+                      "IUPHAR.FAMILY": "http://identifiers.org/iuphar.family/",
+                      "KEGG": "http://identifiers.org/kegg/",
+                      "LOINC": "http://loinc.org/rdf/",
+                      "MEDDRA": "http://identifiers.org/meddra/",
+                      "MESH": "http://identifiers.org/mesh/",
+                      "MGI": "http://identifiers.org/mgi/",
+                      "MI": "http://purl.obolibrary.org/obo/MI_",
+                      "MIR": "http://identifiers.org/mir/",
+                      "MONDO": "http://purl.obolibrary.org/obo/MONDO_",
+                      "MP": "http://purl.obolibrary.org/obo/MP_",
+                      "MSigDB": "https://www.gsea-msigdb.org/gsea/msigdb/",
+                      "MetaCyc": "http://translator.ncats.nih.gov/MetaCyc_",
+                      "NCBIGENE": "http://identifiers.org/ncbigene/",
+                      "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
+                      "NCIT": "http://purl.obolibrary.org/obo/NCIT_",
+                      "NDDF": "http://purl.bioontology.org/ontology/NDDF/",
+                      "NLMID": "https://www.ncbi.nlm.nih.gov/nlmcatalog/?term=",
+                      "OBAN": "http://purl.org/oban/",
+                      "OBOREL": "http://purl.obolibrary.org/obo/RO_",
+                      "OIO": "http://www.geneontology.org/formats/oboInOwl#",
+                      "OMIM": "http://purl.obolibrary.org/obo/OMIM_",
+                      "ORCID": "https://orcid.org/",
+                      "ORPHA": "http://www.orpha.net/ORDO/Orphanet_",
+                      "ORPHANET": "http://identifiers.org/orphanet/",
+                      "PANTHER.FAMILY": "http://identifiers.org/panther.family/",
+                      "PANTHER.PATHWAY": "http://identifiers.org/panther.pathway/",
+                      "PATO-PROPERTY": "http://purl.obolibrary.org/obo/pato#",
+                      "PDQ": "https://www.cancer.gov/publications/pdq#",
+                      "PHARMGKB.DRUG": "http://identifiers.org/pharmgkb.drug/",
+                      "PHARMGKB.PATHWAYS": "http://identifiers.org/pharmgkb.pathways/",
+                      "PHAROS": "http://pharos.nih.gov",
+                      "PMID": "http://www.ncbi.nlm.nih.gov/pubmed/",
+                      "PO": "http://purl.obolibrary.org/obo/PO_",
+                      "POMBASE": "http://identifiers.org/pombase/",
+                      "PR": "http://purl.obolibrary.org/obo/PR_",
+                      "PUBCHEM.COMPOUND": "http://identifiers.org/pubchem.compound/",
+                      "PUBCHEM.SUBSTANCE": "http://identifiers.org/pubchem.substance/",
+                      "PathWhiz": "http://smpdb.ca/pathways/#",
+                      "REACT": "http://www.reactome.org/PathwayBrowser/#/",
+                      "REPODB": "http://apps.chiragjpgroup.org/repoDB/",
+                      "RGD": "http://identifiers.org/rgd/",
+                      "RHEA": "http://identifiers.org/rhea/",
+                      "RNACENTRAL": "http://identifiers.org/rnacentral/",
+                      "RO": "http://purl.obolibrary.org/obo/RO_",
+                      "RTXKG1": "http://kg1endpoint.rtx.ai/",
+                      "RXNORM": "http://purl.bioontology.org/ontology/RXNORM/",
+                      "ResearchID": "https://publons.com/researcher/",
+                      "SEMMEDDB": "https://skr3.nlm.nih.gov/SemMedDB",
+                      "SGD": "http://identifiers.org/sgd/",
+                      "SIO": "http://semanticscience.org/resource/SIO_",
+                      "SMPDB": "http://identifiers.org/smpdb/",
+                      "SNOMEDCT": "http://identifiers.org/snomedct/",
+                      "SNPEFF": "http://translator.ncats.nih.gov/SNPEFF_",
+                      "ScopusID": "https://www.scopus.com/authid/detail.uri?authorId=",
+                      "TAXRANK": "http://purl.obolibrary.org/obo/TAXRANK_",
+                      "UBERGRAPH": "http://translator.renci.org/ubergraph-axioms.ofn#",
+                      "UBERON": "http://purl.obolibrary.org/obo/UBERON_",
+                      "UBERON_CORE": "http://purl.obolibrary.org/obo/uberon/core#",
+                      "UMLS": "http://identifiers.org/umls/",
+                      "UMLSSC": "https://metamap.nlm.nih.gov/Docs/SemanticTypes_2018AB.txt/code#",
+                      "UMLSSG": "https://metamap.nlm.nih.gov/Docs/SemGroups_2018.txt/group#",
+                      "UMLSST": "https://metamap.nlm.nih.gov/Docs/SemanticTypes_2018AB.txt/type#",
+                      "UNII": "http://identifiers.org/unii/",
+                      "UPHENO": "http://purl.obolibrary.org/obo/UPHENO_",
+                      "UniProtKB": "http://identifiers.org/uniprot/",
+                      "VANDF": "https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/VANDF/",
+                      "VMC": "https://github.com/ga4gh/vr-spec/",
+                      "WB": "http://identifiers.org/wb/",
+                      "WBPhenotype": "http://purl.obolibrary.org/obo/WBPhenotype_",
+                      "WBVocab": "http://bio2rdf.org/wormbase_vocabulary",
+                      "WIKIDATA": "https://www.wikidata.org/wiki/",
+                      "WIKIDATA_PROPERTY": "https://www.wikidata.org/wiki/Property:",
+                      "WIKIPATHWAYS": "http://identifiers.org/wikipathways/",
+                      "WormBase": "https://www.wormbase.org/get?name=",
+                      "ZFIN": "http://identifiers.org/zfin/",
+                      "ZP": "http://purl.obolibrary.org/obo/ZP_",
+                      "alliancegenome": "https://www.alliancegenome.org/",
+                      "biolink": "https://w3id.org/biolink/vocab/",
+                      "biolinkml": "https://w3id.org/biolink/biolinkml/",
+                      "chembio": "http://translator.ncats.nih.gov/chembio_",
+                      "dcterms": "http://purl.org/dc/terms/",
+                      "dictyBase": "http://dictybase.org/gene/",
+                      "doi": "https://doi.org/",
+                      "fabio": "http://purl.org/spar/fabio/",
+                      "foaf": "http://xmlns.com/foaf/0.1/",
+                      "foodb.compound": "http://foodb.ca/compounds/",
+                      "gff3": "https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md#",
+                      "gpi": "https://github.com/geneontology/go-annotation/blob/master/specs/gpad-gpi-2-0.md#",
+                      "gtpo": "https://rdf.guidetopharmacology.org/ns/gtpo#",
+                      "hetio": "http://translator.ncats.nih.gov/hetio_",
+                      "interpro": "https://www.ebi.ac.uk/interpro/entry/",
+                      "isbn": "https://www.isbn-international.org/identifier/",
+                      "isni": "https://isni.org/isni/",
+                      "issn": "https://portal.issn.org/resource/ISSN/",
+                      "medgen": "https://www.ncbi.nlm.nih.gov/medgen/",
+                      "oboformat": "http://www.geneontology.org/formats/oboInOWL#",
+                      "pav": "http://purl.org/pav/",
+                      "prov": "http://www.w3.org/ns/prov#",
+                      "qud": "http://qudt.org/1.1/schema/qudt#",
+                      "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+                      "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
+                      "skos": "https://www.w3.org/TR/skos-reference/#",
+                      "wgs": "http://www.w3.org/2003/01/geo/wgs84_pos",
+                      "xsd": "http://www.w3.org/2001/XMLSchema#",
+                      "@vocab": "https://w3id.org/biolink/vocab/"}
+
+    @staticmethod
+    def get_curie_purl(curie):
+        # Split into prefix and suffix
+        suffix = curie.split(":")[1]
+        prefix = curie.split(":")[0]
+
+        # Check to see if the prefix exists in the hash
+        if prefix not in BioLinkPURLerizer.biolink_lookup:
+            return None
+
+        return f"{BioLinkPURLerizer.biolink_lookup[prefix]}{suffix}"
\ No newline at end of file
diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py
index 59f60ba4..b39e6a95 100644
--- a/src/dug/core/async_search.py
+++ b/src/dug/core/async_search.py
@@ -50,12 +50,12 @@ def __init__(self, cfg: Config, indices=None):
                 cafile=self._cfg.elastic_ca_path
             )
             self.es = AsyncElasticsearch(hosts=self.hosts,
-                                     http_auth=(self._cfg.elastic_username,
+                                     basic_auth=(self._cfg.elastic_username,
                                                 self._cfg.elastic_password),
                                                 ssl_context=ssl_context)
         else:
             self.es = AsyncElasticsearch(hosts=self.hosts,
-                                     http_auth=(self._cfg.elastic_username,
+                                     basic_auth=(self._cfg.elastic_username,
                                                 self._cfg.elastic_password))
 
     async def dump_concepts(self, index, query={}, size=None,
@@ -651,6 +651,7 @@ async def search_vars_unscored(self, concept="", query="",
                 new_results = new_results[data_type]
             else:
                 new_results = {}
+        new_results.update({'total_items': total_items['count']})
         return new_results
 
     async def search_kg(self, unique_id, query, offset=0, size=None,
diff --git a/src/dug/core/concept_expander.py b/src/dug/core/concept_expander.py
new file mode 100644
index 00000000..bc8eef50
--- /dev/null
+++ b/src/dug/core/concept_expander.py
@@ -0,0 +1,99 @@
+import json
+import logging
+import os
+import requests
+
+import dug.core.tranql as tql
+
+logger = logging.getLogger('dug')
+
+logging.getLogger("requests").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+class ConceptExpander:
+    def __init__(self, url, min_tranql_score=0.2):
+        self.url = url
+        self.min_tranql_score = min_tranql_score
+        self.include_node_keys = ["id", "name", "synonyms"]
+        self.include_edge_keys = []
+        self.tranql_headers = {"accept": "application/json", "Content-Type": "text/plain"}
+
+    def is_acceptable_answer(self, answer):
+        return True
+
+    def expand_identifier(self, identifier, query_factory, kg_filename, include_all_attributes=False):
+
+        answer_kgs = []
+
+        # Skip TranQL query if a file exists in the crawlspace exists already, but continue w/ answers
+        if os.path.exists(kg_filename):
+            logger.info(f"identifier {identifier} is already crawled. Skipping TranQL query.")
+            with open(kg_filename, 'r') as stream:
+                response = json.load(stream)
+        else:
+            query = query_factory.get_query(identifier)
+            logger.debug(query)
+            response = requests.post(
+                url=self.url,
+                headers=self.tranql_headers,
+                data=query).json()
+
+            # Case: Skip if empty KG
+            try:
+                if response["message"] == 'Internal Server Error' or len(response["message"]["knowledge_graph"]["nodes"]) == 0:
+                    logger.debug(f"Did not find a knowledge graph for {query}")
+                    logger.debug(f"{self.url} returned response: {response}")
+                    return []
+            except KeyError as e:
+                logger.error(f"Could not find key: {e} in response: {response}")
+
+            # Dump out to file if there's a knowledge graph
+            with open(kg_filename, 'w') as stream:
+                json.dump(response, stream, indent=2)
+
+        # Get nodes in knowledge graph hashed by ids for easy lookup
+        noMessage = (len(response.get("message",{})) == 0)
+        statusError = (response.get("status","") == 'Error')
+        if noMessage or statusError:
+            # Skip on error
+            logger.info(f"Error with identifier: {identifier}, response: {response}, kg_filename: '{kg_filename}'")
+            return []
+        kg = tql.QueryKG(response)
+
+        for answer in kg.answers:
+            # Filter out answers that don't meet some criteria
+            # Right now just don't filter anything
+            logger.debug(f"Answer: {answer}")
+            if not self.is_acceptable_answer(answer):
+                logger.warning("Skipping answer as it failed one or more acceptance criteria. See log for details.")
+                continue
+
+            # Get subgraph containing only information for this answer
+            try:
+                # Temporarily surround in try/except because sometimes the answer graphs
+                # contain invalid references to edges/nodes
+                # This will be fixed in Robokop but for now just silently warn if answer is invalid
+                node_attributes_filter = None if include_all_attributes else self.include_node_keys
+                edge_attributes_filter = None if include_all_attributes else self.include_edge_keys
+                answer_kg = kg.get_answer_subgraph(answer,
+                                                   include_node_keys=node_attributes_filter,
+                                                   include_edge_keys=edge_attributes_filter)
+
+                # Add subgraph to list of acceptable answers to query
+                answer_kgs.append(answer_kg)
+
+            except tql.MissingNodeReferenceError:
+                # TEMPORARY: Skip answers that have invalid node references
+                # Need this to be fixed in Robokop
+                logger.warning("Skipping answer due to presence of non-preferred id! "
+                               "See err msg for details.")
+                continue
+            except tql.MissingEdgeReferenceError:
+                # TEMPORARY: Skip answers that have invalid edge references
+                # Need this to be fixed in Robokop
+                logger.warning("Skipping answer due to presence of invalid edge reference! "
+                               "See err msg for details.")
+                continue
+
+        return answer_kgs
+    
\ No newline at end of file
diff --git a/src/dug/core/crawler.py b/src/dug/core/crawler.py
index 1bb64f0b..ae583550 100644
--- a/src/dug/core/crawler.py
+++ b/src/dug/core/crawler.py
@@ -2,8 +2,10 @@
 import logging
 import os
 import traceback
+from typing import List
 
 from dug.core.parsers import Parser, DugElement, DugConcept
+from dug.core.annotators import Annotator, DugIdentifier
 import dug.core.tranql as tql
 from dug.utils import biolink_snake_case, get_formatted_biolink_name
 
@@ -11,7 +13,7 @@
 
 
 class Crawler:
-    def __init__(self, crawl_file: str, parser: Parser, annotator,
+    def __init__(self, crawl_file: str, parser: Parser, annotator: Annotator,
                  tranqlizer, tranql_queries,
                  http_session, exclude_identifiers=None, element_type=None,
                  element_extraction=None):
@@ -22,7 +24,7 @@ def __init__(self, crawl_file: str, parser: Parser, annotator,
         self.crawl_file = crawl_file
         self.parser: Parser = parser
         self.element_type = element_type
-        self.annotator = annotator
+        self.annotator: Annotator = annotator
         self.tranqlizer = tranqlizer
         self.tranql_queries = tranql_queries
         self.http_session = http_session
@@ -142,10 +144,14 @@ def annotate_elements(self):
     def annotate_element(self, element):
 
         # Annotate with a set of normalized ontology identifiers
-        identifiers = self.annotator.annotate(text=element.ml_ready_desc,
+        # self.DugAnnotator.annotator()
+        identifiers: List[DugIdentifier] = self.annotator(text=element.ml_ready_desc,
                                               http_session=self.http_session)
+        # Future thoughts... should we be passing in the stpe DugIdentifier here instead?
+
 
         # Each identifier then becomes a concept that links elements together
+        logger.info("Got %d identifiers for %s", len(identifiers) , element.ml_ready_desc)
         for identifier in identifiers:
             if identifier.id not in self.concepts:
                 # Create concept for newly seen identifier
@@ -259,7 +265,7 @@ def expand_to_dug_element(self,
                                 for key in attribute_mapping:
                                     mapped_value = node.get(attribute_mapping[key], "")
                                     # treat all attributes as strings 
-                                    if key in array_to_string and isinstance(mapped_value, list) and len(mapped_value) > 0:
+                                    if attribute_mapping[key] in array_to_string and isinstance(mapped_value, list) and len(mapped_value) > 0:
                                         mapped_value = mapped_value[0]
                                     element_attribute_args.update({key: mapped_value})
                                 element = DugElement(
diff --git a/src/dug/core/factory.py b/src/dug/core/factory.py
index d1f594a0..0bedab2a 100644
--- a/src/dug/core/factory.py
+++ b/src/dug/core/factory.py
@@ -4,15 +4,11 @@
 from requests_cache import CachedSession
 
 import dug.core.tranql as tql
-from dug.core.annotate import (DugAnnotator, 
-                               Annotator, 
-                               Normalizer, 
-                               Preprocessor, 
-                               SynonymFinder,
-                               ConceptExpander)
+from dug.core.concept_expander import ConceptExpander
 from dug.config import Config as DugConfig, TRANQL_SOURCE
 from dug.core.crawler import Crawler
 from dug.core.parsers import Parser
+from dug.core.annotators import Annotator
 from dug.core.async_search import Search
 from dug.core.index import Index
 
@@ -36,11 +32,11 @@ def build_http_session(self) -> CachedSession:
             connection=redis.StrictRedis(**redis_config)
         )
 
-    def build_crawler(self, target, parser: Parser, element_type: str, tranql_source=None) -> Crawler:
+    def build_crawler(self, target, parser: Parser, annotator: Annotator, element_type: str, tranql_source=None) -> Crawler:
         crawler = Crawler(
             crawl_file=str(target),
             parser=parser,
-            annotator=self.build_annotator(),
+            annotator=annotator,
             tranqlizer=self.build_tranqlizer(),
             tranql_queries=self.build_tranql_queries(tranql_source),
             http_session=self.build_http_session(),
@@ -51,22 +47,6 @@ def build_crawler(self, target, parser: Parser, element_type: str, tranql_source
 
         return crawler
 
-    def build_annotator(self) -> DugAnnotator:
-
-        preprocessor = Preprocessor(**self.config.preprocessor)
-        annotator = Annotator(**self.config.annotator)
-        normalizer = Normalizer(**self.config.normalizer)
-        synonym_finder = SynonymFinder(**self.config.synonym_service)
-
-        annotator = DugAnnotator(
-            preprocessor=preprocessor,
-            annotator=annotator,
-            normalizer=normalizer,
-            synonym_finder=synonym_finder
-        )
-
-        return annotator
-
     def build_tranqlizer(self) -> ConceptExpander:
         return ConceptExpander(**self.config.concept_expander)
 
diff --git a/src/dug/core/index.py b/src/dug/core/index.py
index 93a2d585..0491d064 100644
--- a/src/dug/core/index.py
+++ b/src/dug/core/index.py
@@ -30,12 +30,12 @@ def __init__(self, cfg: Config, indices=None):
             )
             self.es = Elasticsearch(
                 hosts=self.hosts,
-                http_auth=(self._cfg.elastic_username, self._cfg.elastic_password),
+                basic_auth=(self._cfg.elastic_username, self._cfg.elastic_password),
                 ssl_context=ssl_context)
         else:
             self.es = Elasticsearch(
                 hosts=self.hosts,
-                http_auth=(self._cfg.elastic_username, self._cfg.elastic_password))
+                basic_auth=(self._cfg.elastic_username, self._cfg.elastic_password))
         self.replicas = self.get_es_node_count()
 
         if self.es.ping():
diff --git a/src/dug/core/parsers/_base.py b/src/dug/core/parsers/_base.py
index acfc5bbf..f6d3b770 100644
--- a/src/dug/core/parsers/_base.py
+++ b/src/dug/core/parsers/_base.py
@@ -29,6 +29,7 @@ def add_concept(self, concept):
         self.concepts[concept.id] = concept
 
     def jsonable(self):
+        """Output a pickleable object"""
         return self.__dict__
 
     def get_searchable_dict(self):
@@ -55,7 +56,7 @@ def set_search_terms(self):
             concept.set_search_terms()
             search_terms.extend(concept.search_terms)
             search_terms.append(concept.name)
-        search_terms = list(set(search_terms))
+        search_terms = sorted(list(set(search_terms)))
         self.search_terms = search_terms
 
     def set_optional_terms(self):
@@ -63,7 +64,7 @@ def set_optional_terms(self):
         for concept_id, concept in self.concepts.items():
             concept.set_optional_terms()
             optional_terms.extend(concept.optional_terms)
-        optional_terms = list(set(optional_terms))
+        optional_terms = sorted(list(set(optional_terms)))
         self.optional_terms = optional_terms
 
     def __str__(self):
@@ -99,15 +100,15 @@ def add_kg_answer(self, answer, query_name):
             self.kg_answers[answer_id] = answer
 
     def clean(self):
-        self.search_terms = list(set(self.search_terms))
-        self.optional_terms = list(set(self.optional_terms))
+        self.search_terms = sorted(list(set(self.search_terms)))
+        self.optional_terms = sorted(list(set(self.optional_terms)))
 
     def set_search_terms(self):
         # Traverse set of identifiers to determine set of search terms
         search_terms = self.search_terms
         for ident_id, ident in self.identifiers.items():
             search_terms.extend(ident.search_text + ident.synonyms)
-        self.search_terms = list(set(search_terms))
+        self.search_terms = sorted(list(set(search_terms)))
 
     def set_optional_terms(self):
         # Traverse set of knowledge graph answers to determine set of optional search terms
@@ -115,7 +116,7 @@ def set_optional_terms(self):
         for kg_id, kg_answer in self.kg_answers.items():
             optional_terms += kg_answer.get_node_names()
             optional_terms += kg_answer.get_node_synonyms()
-        self.optional_terms = list(set(optional_terms))
+        self.optional_terms = sorted(list(set(optional_terms)))
 
     def get_searchable_dict(self):
         # Translate DugConcept into Elastic-Compatible Concept
@@ -132,6 +133,7 @@ def get_searchable_dict(self):
         return es_conc
 
     def jsonable(self):
+        """Output a pickleable object"""
         return self.__dict__
 
     def __str__(self):
@@ -142,4 +144,4 @@ def __str__(self):
 Parser = Callable[[Any], Iterable[Indexable]]
 
 
-FileParser = Callable[[InputFile], Iterable[Indexable]]
+FileParser = Callable[[InputFile], Iterable[Indexable]]
\ No newline at end of file
diff --git a/src/dug/core/tranql.py b/src/dug/core/tranql.py
index c4c495be..4c458a2a 100644
--- a/src/dug/core/tranql.py
+++ b/src/dug/core/tranql.py
@@ -113,11 +113,14 @@ def get_node_names(self, include_curie=True):
         return node_names
 
     def get_node_synonyms(self, include_curie=True):
+        # @TODO call name-resolver 
         node_synonyms = []
         curie_ids = self.get_curie_ids()
         for node in self.get_nodes():
             if include_curie or node['id'] not in curie_ids:
-                node_synonyms += node.get('synonyms') or []
+                syn = node.get('synonyms') 
+                if isinstance(syn,list):
+                    node_synonyms +=  syn 
         return node_synonyms
 
     def get_curie_ids(self):
diff --git a/src/dug/hookspecs.py b/src/dug/hookspecs.py
index 3a02b9a9..9687b15a 100644
--- a/src/dug/hookspecs.py
+++ b/src/dug/hookspecs.py
@@ -3,6 +3,8 @@
 import pluggy
 
 from dug.core.parsers import Parser
+from dug.core.annotators import Annotator
+from dug.config import Config
 
 hookspec = pluggy.HookspecMarker("dug")
 
@@ -12,3 +14,9 @@ def define_parsers(parser_dict: Dict[str, Parser]):
     """Defines what parsers are available to Dug
     """
     ...
+
+@hookspec
+def define_annotators(annotator_dict: Dict[str, Annotator], config: Config):
+    """Defines what Annotators are available to Dug
+    """
+    ...
diff --git a/src/dug/server.py b/src/dug/server.py
index fde7e5a0..f7a8466a 100644
--- a/src/dug/server.py
+++ b/src/dug/server.py
@@ -3,6 +3,7 @@
 import uvicorn
 
 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
 from dug.config import Config
 from dug.core.async_search import Search
 from pydantic import BaseModel
@@ -15,6 +16,13 @@
     root_path=os.environ.get("ROOT_PATH", "/"),
 )
 
+APP.add_middleware(
+    CORSMiddleware,
+    allow_origins=['*'],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 
 class GetFromIndex(BaseModel):
     index: str = "concepts_index"
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 1a6b7da2..50f57877 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,3 +1,241 @@
 from pathlib import Path
 
-TEST_DATA_DIR = Path(__file__).parent.resolve() / 'data'
+import json
+import urllib.parse
+from dataclasses import dataclass
+from typing import Dict
+
+import pytest_asyncio
+
+TEST_DATA_DIR = Path(__file__).parent.resolve() / "data"
+
+
+@dataclass
+class MockResponse:
+    text: str
+    status_code: int = 200
+
+    def json(self):
+        return json.loads(self.text)
+
+
+class MockApiService:
+    def __init__(self, urls: Dict[str, list]):
+        self.urls = urls
+
+    def get(self, url, params: dict = None):
+        if params:
+            qstr = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)
+            url = f"{url}?{qstr}"
+
+        text, status_code = self.urls.get(url)
+
+        if text is None:
+            return MockResponse(text="{}", status_code=404)
+        return MockResponse(text, status_code=status_code)
+
+    def post(self, url, params: dict = None, json: dict = {}):
+        if params:
+            qstr = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)
+            url = f"{url}?{qstr}"
+        text, status_code = self.urls.get(url)
+
+        if text is None:
+            return MockResponse(text="{}", status_code=404)
+        return MockResponse(text, status_code=status_code)
+
+
+@pytest_asyncio.fixture
+def monarch_annotator_api():
+    base_url = "http://annotator.api/?content={query}"
+
+    def _(keyword):
+        return base_url.format(query=urllib.parse.quote(keyword))
+
+    urls = {
+        _("heart attack"): [
+            json.dumps(
+                {
+                    "content": "heart attack",
+                    "spans": [
+                        {
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "UBERON:0007100",
+                                    "category": ["anatomical entity"],
+                                    "terms": ["primary circulatory organ"],
+                                }
+                            ],
+                        },
+                        {
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "XAO:0000336",
+                                    "category": [],
+                                    "terms": ["heart primordium"],
+                                }
+                            ],
+                        },
+                    ],
+                }
+            ),
+            200,
+        ],
+    }
+
+    return MockApiService(
+        urls=urls,
+    )
+
+
+@pytest_asyncio.fixture
+def token_classifier_api():
+    return MockApiService(
+        urls={
+            "https://med-nemo.apps.renci.org/annotate/": [
+                json.dumps(
+                    {
+                        "text": "Have you ever had a heart attack?",
+                        "denotations": [
+                            {
+                                "id": "I5-",
+                                "span": {"begin": 20, "end": 32},
+                                "obj": "biolink:Disease",
+                                "text": "heart attack",
+                            }
+                        ],
+                    }
+                ),
+                200,
+            ]
+        }
+    )
+
+
+@pytest_asyncio.fixture
+def sapbert_annotator_api():
+    return MockApiService(
+        urls={
+            "https://med-nemo.apps.renci.org/annotate/": [
+                json.dumps(
+                    [
+                        {
+                            "name": "attack; cardiovascular",
+                            "curie": "UBERON:0007100",
+                            "category": "biolink:Disease",
+                            "score": "0.15857231617",
+                        },
+                        {
+                            "name": "Angina attack",
+                            "curie": "XAO:0000336",
+                            "category": "biolink:Disease",
+                            "score": "0.206502258778",
+                        },
+                    ]
+                ),
+                200,
+            ]
+        }
+    )
+
+
+@pytest_asyncio.fixture
+def normalizer_api():
+    base_url = "http://normalizer.api/?curie={curie}"
+
+    def _(curie):
+        return base_url.format(
+            curie=urllib.parse.quote(curie),
+        )
+
+    urls = {
+        _("UBERON:0007100"): [
+            json.dumps(
+                {
+                    "UBERON:0007100": {
+                        "id": {
+                            "identifier": "UBERON:0007100",
+                            "label": "primary circulatory organ",
+                        },
+                        "equivalent_identifiers": [
+                            {
+                                "identifier": "UBERON:0007100",
+                                "label": "primary circulatory organ",
+                            }
+                        ],
+                        "type": [
+                            "biolink:AnatomicalEntity",
+                            "biolink:OrganismalEntity",
+                            "biolink:BiologicalEntity",
+                            "biolink:NamedThing",
+                            "biolink:Entity",
+                        ],
+                    }
+                },
+            ),
+            200,
+        ],
+    }
+
+    return MockApiService(
+        urls=urls,
+    )
+
+
+@pytest_asyncio.fixture
+def null_normalizer_api():
+    base_url = "http://normalizer.api/?curie={curie}"
+
+    def _(curie):
+        return base_url.format(
+            curie=urllib.parse.quote(curie),
+        )
+
+    urls = {
+        _("XAO:0000336"): [
+            json.dumps(
+                {"XAO:0000336": None},
+            ),
+            200,
+        ],
+    }
+
+    return MockApiService(
+        urls=urls,
+    )
+
+
+@pytest_asyncio.fixture
+def synonym_api():
+    return MockApiService(
+        urls={
+            "http://synonyms.api": [
+                json.dumps(
+                    {
+                        "UBERON:0007100": {
+                            "names": [
+                                "primary circulatory organ",
+                                "dorsal tube",
+                                "adult heart",
+                                "heart",
+                            ]
+                        }
+                    }
+                ),
+                200,
+            ]
+        }
+    )
+
+
+@pytest_asyncio.fixture
+def null_synonym_api():
+    return MockApiService(
+        urls={"http://synonyms.api": [json.dumps({"XAO:0000336": {"names":[]}}), 200]}
+    )
diff --git a/tests/integration/mocks/mock_config.py b/tests/integration/mocks/mock_config.py
new file mode 100644
index 00000000..82bcd1b3
--- /dev/null
+++ b/tests/integration/mocks/mock_config.py
@@ -0,0 +1,44 @@
+from dataclasses import dataclass, field
+
+
+@dataclass
+class MockConfig:
+
+    # Preprocessor config that will be passed to annotate.Preprocessor constructor
+    preprocessor: dict = field(default_factory=lambda: {
+        "debreviator": {
+            "BMI": "body mass index"
+        },
+        "stopwords": ["the"]
+    })
+
+
+    # Annotator config that will be passed to annotate.Annotator constructor
+    annotator_type: str = "monarch"
+
+    annotator_args: dict = field(
+        default_factory=lambda: {
+            "monarch": {
+                "url": "http://annotator.api/?content="
+            },
+            "sapbert": {
+                "classification_url": "https://med-nemo.apps.renci.org/annotate/",
+                "annotator_url": "https://med-nemo.apps.renci.org/annotate/",
+            },
+        }
+    )
+
+    # Normalizer config that will be passed to annotate.Normalizer constructor
+    normalizer: dict = field(default_factory=lambda: {
+        "url": "http://normalizer.api/?curie="
+    })
+
+    # Synonym service config that will be passed to annotate.SynonymHelper constructor
+    synonym_service: dict = field(default_factory=lambda: {
+        "url": "http://synonyms.api"
+    })
+
+    @classmethod
+    def test_from_env(cls):
+        kwargs = {}
+        return cls(**kwargs)
\ No newline at end of file
diff --git a/tests/integration/test_annotators.py b/tests/integration/test_annotators.py
new file mode 100644
index 00000000..eecfd1e3
--- /dev/null
+++ b/tests/integration/test_annotators.py
@@ -0,0 +1,149 @@
+from copy import copy
+from typing import List
+from attr import field
+
+import pytest
+from dug.core.annotators.utils.biolink_purl_util import BioLinkPURLerizer
+
+
+from tests.integration.mocks.mock_config import MockConfig
+from dug.core.annotators import (
+    DugIdentifier,
+    AnnotateMonarch,
+    DefaultNormalizer,
+    DefaultSynonymFinder,
+    AnnotateSapbert,
+)
+
+
+def test_monarch_annotation_full(
+    monarch_annotator_api,
+    normalizer_api,
+    null_normalizer_api,
+    synonym_api,
+    null_synonym_api,
+):
+    cfg = MockConfig.test_from_env()
+    normalizer = DefaultNormalizer(**cfg.normalizer)
+    synonym_finder = DefaultSynonymFinder(**cfg.synonym_service)
+
+    annotator = AnnotateMonarch(
+        normalizer=normalizer, synonym_finder=synonym_finder, config=cfg, **cfg.annotator_args["monarch"]
+    )
+    input_text = "heart attack"
+
+    text = annotator.preprocess_text(input_text)
+
+    # Fetch identifiers
+    raw_identifiers: List[DugIdentifier] = annotator.annotate_text(
+        text, monarch_annotator_api
+    )
+
+    processed_identifiers: List[DugIdentifier] = []
+    for identifier in raw_identifiers:
+        if identifier.id == "UBERON:0007100":
+            # Perform normal normalization
+            output = annotator.normalizer(identifier, normalizer_api)
+
+            assert isinstance(output, DugIdentifier)
+            assert output.id == "UBERON:0007100"
+            assert output.label == "primary circulatory organ"
+            assert output.equivalent_identifiers == ["UBERON:0007100"]
+            assert output.types == "anatomical entity"
+        else:
+            # act as if this is null
+            output = annotator.normalizer(identifier, null_normalizer_api)
+
+        # Should be returning normalized identifier for each identifier passed in
+        if output is None:
+            output = identifier
+            # Test normalizer when null
+            assert output.id == "XAO:0000336"
+            assert output.label == "heart primordium"
+
+        # Add synonyms to identifier
+        if identifier.id == "UBERON:0007100":
+            output.synonyms = annotator.synonym_finder(output.id, synonym_api)
+            print(output.synonyms)
+            assert output.synonyms == [
+                "primary circulatory organ",
+                "dorsal tube",
+                "adult heart",
+                "heart",
+            ]
+        else:
+            output.synonyms = annotator.synonym_finder(output.id, null_synonym_api)
+            assert output.synonyms == []
+        # Get pURL for ontology identifer for more info
+        output.purl = BioLinkPURLerizer.get_curie_purl(output.id)
+        processed_identifiers.append(output)
+
+    assert isinstance(processed_identifiers, List)
+    assert len(processed_identifiers) == 2
+    assert isinstance(processed_identifiers[0], DugIdentifier)
+
+
+def test_sapbert_annotation_full(
+    token_classifier_api,
+    sapbert_annotator_api,
+    normalizer_api,
+    null_normalizer_api,
+    synonym_api,
+    null_synonym_api,
+):
+    cfg = MockConfig.test_from_env()
+    normalizer = DefaultNormalizer(**cfg.normalizer)
+    synonym_finder = DefaultSynonymFinder(**cfg.synonym_service)
+
+    annotator = AnnotateSapbert(normalizer=normalizer, synonym_finder=synonym_finder, **cfg.annotator_args["sapbert"])
+    input_text = "Have you ever had a heart attack?"
+
+    # Fetch Classifiers
+    classifiers: List = annotator.text_classification(input_text, token_classifier_api)
+
+    # Fetch identifiers
+    raw_identifiers: List[DugIdentifier] = annotator.annotate_classifiers(
+        classifiers, sapbert_annotator_api
+    )
+    processed_identifiers: List[DugIdentifier] = []
+    for identifier in raw_identifiers:
+        if identifier.id == "UBERON:0007100":
+            # Perform normal normalization
+            output = annotator.normalizer(identifier, normalizer_api)
+            print(output)
+
+            assert isinstance(output, DugIdentifier)
+            assert output.id == "UBERON:0007100"
+            assert output.label == "primary circulatory organ"
+            assert output.equivalent_identifiers == ["UBERON:0007100"]
+            assert output.types == "anatomical entity"
+        else:
+            # act as if this is null
+            output = annotator.normalizer(identifier, null_normalizer_api)
+
+        # Should be returning normalized identifier for each identifier passed in
+        if output is None:
+            output = identifier
+            # Test normalizer when null
+            assert output.id == "XAO:0000336"
+            assert output.label == "Angina attack"
+
+        # Add synonyms to identifier
+        if identifier.id == "UBERON:0007100":
+            output.synonyms = annotator.synonym_finder(output.id, synonym_api)
+            assert output.synonyms == [
+                "primary circulatory organ",
+                "dorsal tube",
+                "adult heart",
+                "heart",
+            ]
+        else:
+            output.synonyms = annotator.synonym_finder(output.id, null_synonym_api)
+            assert output.synonyms == []
+        # Get pURL for ontology identifer for more info
+        output.purl = BioLinkPURLerizer.get_curie_purl(output.id)
+        processed_identifiers.append(output)
+
+    assert isinstance(processed_identifiers, List)
+    assert len(processed_identifiers) == 2
+    assert isinstance(processed_identifiers[0], DugIdentifier)
diff --git a/tests/integration/test_async_search.py b/tests/integration/test_async_search.py
index 0ce6cb5c..8e0a65c7 100644
--- a/tests/integration/test_async_search.py
+++ b/tests/integration/test_async_search.py
@@ -5,12 +5,21 @@
 
 from fastapi.testclient import TestClient
 from elasticsearch.exceptions import ConnectionError
+from dug.config import Config
+
 class APISearchTestCase(TestCase):
     "API search with mocked elasticsearch"
 
     def test_concepts_types_parameter(self):
         "Test API concepts search with types parameter"
-        # This should patch the elasticsearch object with the mock
+        cfg = Config.from_env()
+        if cfg.elastic_password == "changeme":
+            # Dummy config is in place, skip the test
+            self.skipTest(
+                "For the integration test, a populated elasticsearch "
+                "instance must be available and configured in the "
+                "environment variables. See dug.config for more.")
+
         from dug.server import APP
         client = TestClient(APP)
         types = ['anatomical entity', 'drug']
diff --git a/tests/integration/test_index.py b/tests/integration/test_index.py
index 31d0d3db..829e4ba0 100644
--- a/tests/integration/test_index.py
+++ b/tests/integration/test_index.py
@@ -21,7 +21,7 @@ def is_elastic_up():
     try:
         es = Elasticsearch(
             hosts=hosts,
-            http_auth=(username, password)
+            basic_auth=(username, password)
         )
         return es.ping()
     except Exception:
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index e1b63d9a..87f2edcc 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -3,8 +3,7 @@
 from dataclasses import dataclass
 from typing import Dict
 
-import pytest
-
+import pytest_asyncio
 
 @dataclass
 class MockResponse:
@@ -29,7 +28,7 @@ def get(self, url, params: dict = None):
         if text is None:
             return MockResponse(text="{}", status_code=404)
         return MockResponse(text, status_code=status_code)
-    
+
     def post(self, url, params: dict = None, json: dict = {}):
         if params:
             qstr = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)
@@ -41,134 +40,108 @@ def post(self, url, params: dict = None, json: dict = {}):
         return MockResponse(text, status_code=status_code)
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def annotator_api():
     base_url = "http://annotator.api/?content={query}"
 
     def _(keyword):
-        return base_url.format(
-            query=urllib.parse.quote(keyword)
-        )
+        return base_url.format(query=urllib.parse.quote(keyword))
 
     urls = {
-        _("heart attack"): [json.dumps({
-            "content": "heart attack",
-            "spans": [
+        _("heart attack"): [
+            json.dumps(
                 {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
+                    "content": "heart attack",
+                    "spans": [
                         {
-                            "id": "UBERON:0015230",
-                            "category": [
-                                "anatomical entity"
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "UBERON:0015230",
+                                    "category": ["anatomical entity"],
+                                    "terms": ["dorsal vessel heart"],
+                                }
                             ],
-                            "terms": [
-                                "dorsal vessel heart"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
+                        },
                         {
-                            "id": "UBERON:0007100",
-                            "category": [
-                                "anatomical entity"
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "UBERON:0007100",
+                                    "category": ["anatomical entity"],
+                                    "terms": ["primary circulatory organ"],
+                                }
                             ],
-                            "terms": [
-                                "primary circulatory organ"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
+                        },
                         {
-                            "id": "UBERON:0015228",
-                            "category": [
-                                "anatomical entity"
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "UBERON:0015228",
+                                    "category": ["anatomical entity"],
+                                    "terms": ["circulatory organ"],
+                                }
                             ],
-                            "terms": [
-                                "circulatory organ"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
+                        },
                         {
-                            "id": "ZFA:0000114",
-                            "category": [
-                                "anatomical entity"
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "ZFA:0000114",
+                                    "category": ["anatomical entity"],
+                                    "terms": ["heart"],
+                                }
                             ],
-                            "terms": [
-                                "heart"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
+                        },
                         {
-                            "id": "UBERON:0000948",
-                            "category": [
-                                "anatomical entity"
+                            "start": 0,
+                            "end": 5,
+                            "text": "heart",
+                            "token": [
+                                {
+                                    "id": "UBERON:0000948",
+                                    "category": ["anatomical entity"],
+                                    "terms": ["heart"],
+                                }
                             ],
-                            "terms": [
-                                "heart"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 12,
-                    "text": "heart attack",
-                    "token": [
+                        },
                         {
-                            "id": "MONDO:0005068",
-                            "category": [
-                                "disease"
+                            "start": 0,
+                            "end": 12,
+                            "text": "heart attack",
+                            "token": [
+                                {
+                                    "id": "MONDO:0005068",
+                                    "category": ["disease"],
+                                    "terms": ["myocardial infarction (disease)"],
+                                }
                             ],
-                            "terms": [
-                                "myocardial infarction (disease)"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 12,
-                    "text": "heart attack",
-                    "token": [
+                        },
                         {
-                            "id": "HP:0001658",
-                            "category": [
-                                "phenotype",
-                                "quality"
+                            "start": 0,
+                            "end": 12,
+                            "text": "heart attack",
+                            "token": [
+                                {
+                                    "id": "HP:0001658",
+                                    "category": ["phenotype", "quality"],
+                                    "terms": ["Myocardial infarction"],
+                                }
                             ],
-                            "terms": [
-                                "Myocardial infarction"
-                            ]
-                        }
-                    ]
+                        },
+                    ],
                 }
-            ]
-        }), 200],
+            ),
+            200,
+        ],
     }
 
     return MockApiService(
@@ -176,7 +149,7 @@ def _(keyword):
     )
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def normalizer_api():
     base_url = "http://normalizer.api/?curie={curie}"
 
@@ -186,30 +159,32 @@ def _(curie):
         )
 
     urls = {
-        _("UBERON:0007100"): [json.dumps(
-            {
-                "UBERON:0007100": {
-                    "id": {
-                        "identifier": "UBERON:0007100",
-                        "label": "primary circulatory organ"
-                    },
-                    "equivalent_identifiers": [
-                        {
+        _("UBERON:0007100"): [
+            json.dumps(
+                {
+                    "UBERON:0007100": {
+                        "id": {
                             "identifier": "UBERON:0007100",
-                            "label": "primary circulatory organ"
-                        }
-                    ],
-                    "type": [
-                        "biolink:AnatomicalEntity",
-                        "biolink:OrganismalEntity",
-                        "biolink:BiologicalEntity",
-                        "biolink:NamedThing",
-                        "biolink:Entity"
-                    ]
-                }
-            },
-        ), 200],
-
+                            "label": "primary circulatory organ",
+                        },
+                        "equivalent_identifiers": [
+                            {
+                                "identifier": "UBERON:0007100",
+                                "label": "primary circulatory organ",
+                            }
+                        ],
+                        "type": [
+                            "biolink:AnatomicalEntity",
+                            "biolink:OrganismalEntity",
+                            "biolink:BiologicalEntity",
+                            "biolink:NamedThing",
+                            "biolink:Entity",
+                        ],
+                    }
+                },
+            ),
+            200,
+        ],
     }
 
     return MockApiService(
@@ -217,21 +192,30 @@ def _(curie):
     )
 
 
-@pytest.fixture
-def synonym_api():    
-    return MockApiService(urls={
-        "http://synonyms.api": [json.dumps({
-            "UBERON:0007100": [
-                "primary circulatory organ",
-                "dorsal tube",
-                "adult heart",
-                "heart"
+@pytest_asyncio.fixture
+def synonym_api():
+    return MockApiService(
+        urls={
+            "http://synonyms.api": [
+                json.dumps(
+                    {
+                        "UBERON:0007100": {
+                            "names": [
+                                "primary circulatory organ",
+                                "dorsal tube",
+                                "adult heart",
+                                "heart",
+                            ]
+                        }
+                    }
+                ),
+                200,
             ]
-        }), 200]
-    })
+        }
+    )
 
 
-@pytest.fixture()
+@pytest_asyncio.fixture()
 def ontology_api():
     base_url = "http://ontology.api/?curie={curie}"
 
@@ -240,48 +224,31 @@ def _(curie):
             curie=urllib.parse.quote(curie),
         )
 
-    return MockApiService(urls={
-        _("UBERON:0007100"): [json.dumps(
-            {
-                "taxon": {
-                    "id": None,
-                    "label": None
-                },
-                "association_counts": None,
-                "xrefs": [
-                    "SPD:0000130",
-                    "FBbt:00003154",
-                    "TADS:0000147"
-                ],
-                "description": "A hollow, muscular organ, which, by contracting rhythmically, keeps up the circulation of the blood or analogs[GO,modified].",
-                "types": None,
-                "synonyms": [
-                    {
-                        "val": "dorsal tube",
-                        "pred": "synonym",
-                        "xrefs": None
-                    },
-                    {
-                        "val": "adult heart",
-                        "pred": "synonym",
-                        "xrefs": None
-                    },
+    return MockApiService(
+        urls={
+            _("UBERON:0007100"): [
+                json.dumps(
                     {
-                        "val": "heart",
-                        "pred": "synonym",
-                        "xrefs": None
+                        "taxon": {"id": None, "label": None},
+                        "association_counts": None,
+                        "xrefs": ["SPD:0000130", "FBbt:00003154", "TADS:0000147"],
+                        "description": "A hollow, muscular organ, which, by contracting rhythmically, keeps up the circulation of the blood or analogs[GO,modified].",
+                        "types": None,
+                        "synonyms": [
+                            {"val": "dorsal tube", "pred": "synonym", "xrefs": None},
+                            {"val": "adult heart", "pred": "synonym", "xrefs": None},
+                            {"val": "heart", "pred": "synonym", "xrefs": None},
+                        ],
+                        "deprecated": None,
+                        "replaced_by": None,
+                        "consider": None,
+                        "id": "UBERON:0007100",
+                        "label": "primary circulatory organ",
+                        "iri": "http://purl.obolibrary.org/obo/UBERON_0007100",
+                        "category": ["anatomical entity"],
                     }
-                ],
-                "deprecated": None,
-                "replaced_by": None,
-                "consider": None,
-                "id": "UBERON:0007100",
-                "label": "primary circulatory organ",
-                "iri": "http://purl.obolibrary.org/obo/UBERON_0007100",
-                "category": [
-                    "anatomical entity"
-                ]
-            }
-        ), 200]
-    })
-
+                ),
+                200,
+            ]
+        }
+    )
diff --git a/tests/unit/mocks/MockCrawler.py b/tests/unit/mocks/MockCrawler.py
index 1c69dabe..2597d777 100644
--- a/tests/unit/mocks/MockCrawler.py
+++ b/tests/unit/mocks/MockCrawler.py
@@ -5,7 +5,7 @@
 import json
 
 
-from dug.core.annotate import Identifier
+from dug.core.annotators import DugIdentifier
 from dug.core.tranql import QueryFactory, QueryKG
 
 # Makes some simple mokes
@@ -25,14 +25,14 @@
 ExcludedIDs = []
 
 ANNOTATED_IDS = [
-    Identifier("MONDO:0", "0", ["disease"]),
-    Identifier("PUBCHEM.COMPOUND:1", "1", ["chemical"])
+    DugIdentifier("MONDO:0", "0", ["disease"]),
+    DugIdentifier("PUBCHEM.COMPOUND:1", "1", ["chemical"])
     ]
 for ids in ANNOTATED_IDS:
     ids.type = ids.types[0]
 # annotator with annotate method returning mocked concepts
 AnnotatorMock = MagicMock()
-AnnotatorMock.annotate = Mock(return_value=ANNOTATED_IDS)
+AnnotatorMock = Mock(return_value=ANNOTATED_IDS)
 
 # tranqlizer returning mock kg when expanding concepts
 TranqlizerMock = MagicMock()
diff --git a/tests/unit/mocks/data/mock_config.py b/tests/unit/mocks/data/mock_config.py
new file mode 100644
index 00000000..d70f8a3a
--- /dev/null
+++ b/tests/unit/mocks/data/mock_config.py
@@ -0,0 +1,43 @@
+from dataclasses import dataclass, field
+
+
+@dataclass
+class MockConfig:
+
+    # Preprocessor config that will be passed to annotate.Preprocessor constructor
+    preprocessor: dict = field(default_factory=lambda: {
+        "debreviator": {
+            "BMI": "body mass index"
+        },
+        "stopwords": ["the"]
+    })
+
+    # Annotator config that will be passed to annotate.Annotator constructor
+    annotator_type: str = "monarch"
+
+    annotator_args: dict = field(
+        default_factory=lambda: {
+            "monarch": {
+                "url": "http://annotator.api/?content="
+            },
+            "sapbert": {
+                "classification_url": "http://classifier.api/annotate/",
+                "annotator_url": "http://entity-link.api/annotate/",
+            },
+        }
+    )
+
+    # Normalizer config that will be passed to annotate.Normalizer constructor
+    normalizer: dict = field(default_factory=lambda: {
+        "url": "http://normalizer.api/?curie="
+    })
+
+    # Synonym service config that will be passed to annotate.SynonymHelper constructor
+    synonym_service: dict = field(default_factory=lambda: {
+        "url": "http://synonyms.api"
+    })
+
+    @classmethod
+    def test_from_env(cls):
+        kwargs = {}
+        return cls(**kwargs)
\ No newline at end of file
diff --git a/tests/unit/test_annotate.py b/tests/unit/test_annotate.py
deleted file mode 100644
index 87869566..00000000
--- a/tests/unit/test_annotate.py
+++ /dev/null
@@ -1,244 +0,0 @@
-from copy import copy
-from typing import List
-
-import pytest
-
-from dug.config import Config
-from dug.core.annotate import Identifier, Preprocessor, Annotator, Normalizer, SynonymFinder
-
-
-def test_identifier():
-    ident_1 = Identifier(
-        "PrimaryIdent:1", "first identifier", types=[], search_text="", description=""
-    )
-
-    assert "PrimaryIdent" == ident_1.id_type
-
-
-@pytest.mark.parametrize(
-    "preprocessor,input_text,expected_text",
-    [
-        (Preprocessor(), "Hello_world", "Hello world"),
-        (Preprocessor({"Hello": "Hi"}, ["placeholder"]), "Hello placeholder world", "Hi world"),
-    ]
-)
-def test_preprocessor_preprocess(preprocessor, input_text, expected_text):
-    original_text = copy(input_text)
-    output_text = preprocessor.preprocess(input_text)
-
-    assert input_text == original_text  # Don't modify in-place
-    assert output_text == expected_text
-
-
-def test_annotator_init():
-    cfg = Config.from_env()
-    url = cfg.annotator["url"]
-
-    annotator = Annotator(**cfg.annotator)
-    assert annotator.url == url
-
-
-def test_annotator_handle_response():
-    annotator = Annotator('foo')
-
-    response = {
-            "content": "heart attack",
-            "spans": [
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
-                        {
-                            "id": "UBERON:0015230",
-                            "category": [
-                                "anatomical entity"
-                            ],
-                            "terms": [
-                                "dorsal vessel heart"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
-                        {
-                            "id": "UBERON:0007100",
-                            "category": [
-                                "anatomical entity"
-                            ],
-                            "terms": [
-                                "primary circulatory organ"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
-                        {
-                            "id": "UBERON:0015228",
-                            "category": [
-                                "anatomical entity"
-                            ],
-                            "terms": [
-                                "circulatory organ"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
-                        {
-                            "id": "ZFA:0000114",
-                            "category": [
-                                "anatomical entity"
-                            ],
-                            "terms": [
-                                "heart"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 5,
-                    "text": "heart",
-                    "token": [
-                        {
-                            "id": "UBERON:0000948",
-                            "category": [
-                                "anatomical entity"
-                            ],
-                            "terms": [
-                                "heart"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 12,
-                    "text": "heart attack",
-                    "token": [
-                        {
-                            "id": "MONDO:0005068",
-                            "category": [
-                                "disease"
-                            ],
-                            "terms": [
-                                "myocardial infarction (disease)"
-                            ]
-                        }
-                    ]
-                },
-                {
-                    "start": 0,
-                    "end": 12,
-                    "text": "heart attack",
-                    "token": [
-                        {
-                            "id": "HP:0001658",
-                            "category": [
-                                "phenotype",
-                                "quality"
-                            ],
-                            "terms": [
-                                "Myocardial infarction"
-                            ]
-                        }
-                    ]
-                }
-            ]
-        }
-
-    identifiers: List[Identifier] = annotator.handle_response(None, response)
-
-    assert len(identifiers) == 7
-    assert isinstance(identifiers[0], Identifier)
-
-
-def test_annotator_call(annotator_api):
-    url = "http://annotator.api/?content="
-
-    annotator = Annotator(url)
-
-    text = "heart attack"
-    identifiers: List[Identifier] = annotator.annotate(text, annotator_api)
-
-    assert len(identifiers) == 7
-    assert isinstance(identifiers[0], Identifier)
-
-
-def test_normalizer(normalizer_api):
-    url = "http://normalizer.api/?curie="
-
-    identifier = Identifier(
-        "UBERON:0007100",
-        label='primary circulatory organ',
-        types=['anatomical entity'],
-        description="",
-        search_text=['heart'],
-    )
-
-    normalizer = Normalizer(url)
-    output = normalizer.normalize(identifier, normalizer_api)
-    assert isinstance(output, Identifier)
-    assert output.id == 'UBERON:0007100'
-    assert output.label == "primary circulatory organ"
-    assert output.equivalent_identifiers == ['UBERON:0007100']
-    assert output.types == 'anatomical entity'
-    
-
-
-def test_synonym_finder(synonym_api):
-    curie = "UBERON:0007100"
-    url = f"http://synonyms.api"
-    finder = SynonymFinder(url)
-    result = finder.get_synonyms(
-        curie,
-        synonym_api,
-    )
-    assert result == [
-            "primary circulatory organ",
-            "dorsal tube",
-            "adult heart",
-            "heart"
-        ]
-
-
-
-
-
-def test_yield_partial_text():
-    annotator = Annotator('foo')
-    # text contains 800 characters + 9 new lines
-    text = """COG Protocol number on which the patient was enrolled [901=Trial of mouse monoclonal Anti-GD-2 antibody 14.G2A plus IL-2 with or without GM-CSF in children with refractory NBL or melanoma; 911=I-131-MIBG for therapy of advanced neuroblastoma; 914=A dose escalation study of cisplatin, doxorubicin, VP-16, and ifosfamide followed by GM-CSF in advanced NBL and peripheral neuroepithelioma; 925=Study of topotecan; 935=Study of ch14.18 with GM-CSF in children with NBL and other GD2 positive malignancies immediately post ABMT or PBSC; 937=Phase I trial of ZD1694, an inhibitor of thymidylate synthase, in pediatric patients with advanced neoplastic disease; 9709=A phase I study of fenretinide in children with high risk solid tumors; 321P2=New intensive chemotherapy for CCG stage II (with N-myc amplification), stage III and stage IV neuroblastoma; 321P3=Treatment of poor prognosis neuroblastoma before disease progression with intensive multimodal therapy and BMT; 323P=Cyclic combination chemotherapy for newly diagnosed stage III neuroblastoma age 2 and older and stage IV Nneuroblastoma all ages; 3881=Biology and therapy of good, intermediate, and selected poor prognosis neuroblastoma; 3891=Conventional dose chemoradiotherapy vs ablative chemoradiotherapy with autologous BMT for high-risk neuroblastoma; 3951=Phase I pilot study of multiple cycles of high dose chemotherapy with peripheral blood stem cell infusions in advanced stage neuroblastoma.; 4941=National Wilms tumor study V - therapeutic trial & biology study; 8605=Study of the combination of ifosfamide, mesna, and VP-16 in children and young adults with recurrent sarcomas, PNET and other tumors; 8742=Phase III portion of 8741 for neuroblastoma; 9047=Neuroblastoma biology protocol; 9082=Protocol for the development of intervention strategies to reduce the time between symptom onset and diagnosis of childhood cancer -a pediatric oncology group cancer control study; 9140=Therapy for patients with recurrent or refractory neuroblastoma - a phase II study; 9262=A Phase II study of taxol in children with recurrent/refractory soft-tissue sarcoma, rhabdomyosarcoma, osteosarcoma, Ewing's sarcoma, neuroblastoma, germ cell tumors, Wilms' tumor, hepatoblastoma, and hepatocellular carcinoma, a POG study; 9280=Neuroblastoma epidemiology protocol - A Non-Therapeutic Study - A Joint Project of: The University of North Carolina, The Pediatric Oncology Group and The Children's Cancer Study Group; 9340=Treatment of patients >365 days at diagnosis with stage IV NBL: Upfront Phase II Window - A Phase II Study; 9341=Treatment of patients >365 days at diagnosis with stage IV and stage IIB/III (N-myc) NBL - a phase III study; 9342=Neuroblastoma #5, bone marrow transplant - a phase III study; 9343=Interleukin-6 in children receiving autologous bone marrow transplantation for advanced neuroblastoma - a pediatric oncology group phase I trial; 9361=Topotecan in pediatric patients with recurrent or progressive solid tumors - a pediatric oncology group phase II study; 9375=Topotecan plus cyclophosphamide in children with solid tumors - a pediatric oncology group phase I trial; 9464=Cyclophosphamide plus topotecan in children with recurrent or refractory solid tumors - a pediatric oncology group phase II study; 9640=Treatment of patients with high risk neuroblastoma (a feasibility pilot) using two cycles of marrow ablative chemotherapy followed by rescue With peripheral blood stem cells (PBSC), radiation therapy; A3973=A randomized study of purged vs. unpurged PBSC transplant following dose intensive induction therapy for high risk NBL; AADM01P1=Protocol for registration and consent to the childhood cancer research network: a limited institution pilot; AAML00P2=A dose finding study of the safety of gemtuzumab ozogamicin combined with conventional chemotherapy for patients with relapsed or refractory acute myeloid leukemia; ACCL0331=A Randomized double blind placebo controlled clinical trial to assess the efficacy of traumeel® S (IND # 66649) for the prevention and treatment of mucositis in children undergoing hematopoietic stem cell transplantation; ACCRN07=Protocol for the enrollment on the official COG registry, The Childhood Cancer Research Network (CCRN); ADVL0018=Phase I study of hu14.18-IL2 fusion protein in patients with refractory neuroblastoma and other refractory GD2 expressing tumors; ADVL0212=A Phase I study of depsipeptide (NSC#630176, IND# 51810) in pediatric patients with refractory solid tumors and leukemias; ADVL0214=A phase I study of single agent OSI-774 (Tarceva) (NSC # 718781, IND #63383) followed by OSI-774 with temozolomide for patients with selected recurrent/refractory solid tumors, including brain tumors; ADVL0215=A phase I study of decitabine in combination with doxorubicin and cyclophosphamide in the treatment of relapsed or refractory solid tumors; ADVL0421=A phase II study of oxaliplatin in children with recurrent solid tumors; ADVL0524=Phase II trial of ixabepilone (BMS-247550), an epothilone B analog, in children and young adults with refractory solid tumors; ADVL0525=A phase II study of pemetrexed in children with recurrent malignancies; ADVL06B1=A pharmacokinetic-pharmacodynamic-pharmacogenetic study of actinomycin-D and vincristine in children with cancer; ADVL0714=A phase I study of VEGF trap (NSC# 724770, IND# 100137) in children with refractory solid tumors; ALTE03N1=Key adverse events after childhood cancer; ALTE05N1=Umbrella long-term follow-up protocol; ANBL0032=Phase III randomized study of chimeric antibody 14.18 (Ch14.18) in high risk neuroblastoma following myeloablative therapy and autologous stem cell rescue; ANBL00B1=Neuroblastoma biology studies; ANBL00P1=A pilot study of tandem high dose chemotherapy with stem cell rescue following induction therapy in children with high risk neuroblastoma; ANBL02P1=A pilot induction regimen incorporating dose-intensive topotecan and cyclophosphamide for treatment of newly diagnosed high risk neuroblastoma; ANBL0321=Phase II study of fenretinide in pediatric patients with resistant or recurrent neuroblastoma; ANBL0322=A phase II study of hu14.18-IL2 (BB-IND-9728) in children with recurrent or refractory neuroblastoma; ANBL0532=Phase III randomized trial of single vs. tandem myeloablative as consolidation therapy for high-risk neuroblastoma; ANBL0621=A phase II study of ABT-751, an orally bioavailable tubulin binding agent, in children with relapsed or refractory neuroblastoma; B003=Diagnostic & prognostic studies in NBL; B903=Childhood cancer genetics; B947=Protocol for collection of biology specimens for research studies; B954=Opsoclonus-myoclonus-ataxia syndrome, neuroblastoma and the presence of anti-neuronal antibodies; B973=Laboratory-clinical studies of neuroblastoma; E04=Self-administered epidemiology questionnaire; E18=A case-control study of risk factors for neuroblastoma; I03=Neuroblastoma, diagnostic/prognostic; N891=Parents' perceptions of randomization; P9462=Randomized treatment of recurrent neuroblastoma with topotecan regimens following desferrioxamine (POG only) in an investigational window; P9641=Primary surgical therapy for biologically defined low-risk neuroblastoma; P9761=A phase II trial of irinotecan in children with refractory solid tumors; P9963=A phase II trial of rebeccamycin analogue (NSC #655649) in children with solid tumors; R9702=Prognostic implications of MIBG uptake in patients with neuroblastoma previously treated on CCG-3891; S31=Right atrial catheter study; S921=Comparison of urokinase vs heparin in preventing Infection in central venous devices in children with malignancies]"""
-    chunks = ""
-    is_the_beginning = True
-    max_chars = 2000
-    padding_words = 3
-    counter = 0
-    print(len(text))
-    # divvy up into chunks,  sum of each chunk should equal the original text.
-    for chunk in annotator.sliding_window(text=text, max_characters=max_chars, padding_words= padding_words):
-        assert len(chunk) <= max_chars
-        counter += 1
-        if is_the_beginning:
-            chunks += chunk
-        else:
-            # remove redundand padded words from final result
-            chunks += " ".join(chunk.split(" ")[padding_words:])
-        is_the_beginning = False
-
-    print(counter)
-    # since spaces are trimmed by tokenizer , we can execuled all spaces and do char
-    assert chunks == text
\ No newline at end of file
diff --git a/tests/unit/test_annotators.py b/tests/unit/test_annotators.py
new file mode 100644
index 00000000..830a1401
--- /dev/null
+++ b/tests/unit/test_annotators.py
@@ -0,0 +1,102 @@
+from copy import copy
+from typing import List
+from attr import field
+
+import pytest
+from dug.core.annotators.utils.biolink_purl_util import BioLinkPURLerizer
+
+from tests.unit.mocks.data.mock_config import MockConfig
+from dug.core.annotators import (
+    DugIdentifier,
+    AnnotateMonarch,
+    DefaultNormalizer,
+    DefaultSynonymFinder,
+)
+from unittest.mock import MagicMock
+
+
+def test_identifier():
+    ident_1 = DugIdentifier(
+        "PrimaryIdent:1", "first identifier", types=[], search_text="", description=""
+    )
+
+    assert "PrimaryIdent" == ident_1.id_type
+
+
+def test_annotator(annotator_api):
+    cfg = MockConfig.test_from_env()
+    normalizer = DefaultNormalizer(cfg.normalizer)
+    synonym_finder = DefaultSynonymFinder(cfg.synonym_service)
+
+    annotator = AnnotateMonarch(
+        normalizer=normalizer, synonym_finder=synonym_finder, config=cfg , **cfg.annotator_args["monarch"]
+    )
+    text = "heart attack"
+    identifiers: List[DugIdentifier] = annotator.annotate_text(
+        text, annotator_api
+    )
+
+    assert len(identifiers) == 7
+    assert isinstance(identifiers[0], DugIdentifier)
+
+
+def test_normalizer(normalizer_api):
+    url = "http://normalizer.api/?curie="
+
+    identifier = DugIdentifier(
+        "UBERON:0007100",
+        label='primary circulatory organ',
+        types=['anatomical entity'],
+        description="",
+        search_text=['heart'],
+    )
+
+    normalizer = DefaultNormalizer(url)
+    output = normalizer(identifier, normalizer_api)
+    assert isinstance(output, DugIdentifier)
+    assert output.id == 'UBERON:0007100'
+    assert output.label == "primary circulatory organ"
+    assert output.equivalent_identifiers == ['UBERON:0007100']
+    assert output.types == 'anatomical entity'
+
+
+def test_synonym_finder(synonym_api):
+    curie = "UBERON:0007100"
+    url = f"http://synonyms.api"
+    finder = DefaultSynonymFinder(url)
+    result = finder(
+        curie,
+        synonym_api,
+    )
+    assert result == [
+            "primary circulatory organ",
+            "dorsal tube",
+            "adult heart",
+            "heart"
+        ]
+
+
+# def test_yield_partial_text():
+#     annotator = Annotator('foo')
+#     # text contains 800 characters + 9 new lines
+#     text = """COG Protocol number on which the patient was enrolled [901=Trial of mouse monoclonal Anti-GD-2 antibody 14.G2A plus IL-2 with or without GM-CSF in children with refractory NBL or melanoma; 911=I-131-MIBG for therapy of advanced neuroblastoma; 914=A dose escalation study of cisplatin, doxorubicin, VP-16, and ifosfamide followed by GM-CSF in advanced NBL and peripheral neuroepithelioma; 925=Study of topotecan; 935=Study of ch14.18 with GM-CSF in children with NBL and other GD2 positive malignancies immediately post ABMT or PBSC; 937=Phase I trial of ZD1694, an inhibitor of thymidylate synthase, in pediatric patients with advanced neoplastic disease; 9709=A phase I study of fenretinide in children with high risk solid tumors; 321P2=New intensive chemotherapy for CCG stage II (with N-myc amplification), stage III and stage IV neuroblastoma; 321P3=Treatment of poor prognosis neuroblastoma before disease progression with intensive multimodal therapy and BMT; 323P=Cyclic combination chemotherapy for newly diagnosed stage III neuroblastoma age 2 and older and stage IV Nneuroblastoma all ages; 3881=Biology and therapy of good, intermediate, and selected poor prognosis neuroblastoma; 3891=Conventional dose chemoradiotherapy vs ablative chemoradiotherapy with autologous BMT for high-risk neuroblastoma; 3951=Phase I pilot study of multiple cycles of high dose chemotherapy with peripheral blood stem cell infusions in advanced stage neuroblastoma.; 4941=National Wilms tumor study V - therapeutic trial & biology study; 8605=Study of the combination of ifosfamide, mesna, and VP-16 in children and young adults with recurrent sarcomas, PNET and other tumors; 8742=Phase III portion of 8741 for neuroblastoma; 9047=Neuroblastoma biology protocol; 9082=Protocol for the development of intervention strategies to reduce the time between symptom onset and diagnosis of childhood cancer -a pediatric oncology group cancer control study; 9140=Therapy for patients with recurrent or refractory neuroblastoma - a phase II study; 9262=A Phase II study of taxol in children with recurrent/refractory soft-tissue sarcoma, rhabdomyosarcoma, osteosarcoma, Ewing's sarcoma, neuroblastoma, germ cell tumors, Wilms' tumor, hepatoblastoma, and hepatocellular carcinoma, a POG study; 9280=Neuroblastoma epidemiology protocol - A Non-Therapeutic Study - A Joint Project of: The University of North Carolina, The Pediatric Oncology Group and The Children's Cancer Study Group; 9340=Treatment of patients >365 days at diagnosis with stage IV NBL: Upfront Phase II Window - A Phase II Study; 9341=Treatment of patients >365 days at diagnosis with stage IV and stage IIB/III (N-myc) NBL - a phase III study; 9342=Neuroblastoma #5, bone marrow transplant - a phase III study; 9343=Interleukin-6 in children receiving autologous bone marrow transplantation for advanced neuroblastoma - a pediatric oncology group phase I trial; 9361=Topotecan in pediatric patients with recurrent or progressive solid tumors - a pediatric oncology group phase II study; 9375=Topotecan plus cyclophosphamide in children with solid tumors - a pediatric oncology group phase I trial; 9464=Cyclophosphamide plus topotecan in children with recurrent or refractory solid tumors - a pediatric oncology group phase II study; 9640=Treatment of patients with high risk neuroblastoma (a feasibility pilot) using two cycles of marrow ablative chemotherapy followed by rescue With peripheral blood stem cells (PBSC), radiation therapy; A3973=A randomized study of purged vs. unpurged PBSC transplant following dose intensive induction therapy for high risk NBL; AADM01P1=Protocol for registration and consent to the childhood cancer research network: a limited institution pilot; AAML00P2=A dose finding study of the safety of gemtuzumab ozogamicin combined with conventional chemotherapy for patients with relapsed or refractory acute myeloid leukemia; ACCL0331=A Randomized double blind placebo controlled clinical trial to assess the efficacy of traumeel® S (IND # 66649) for the prevention and treatment of mucositis in children undergoing hematopoietic stem cell transplantation; ACCRN07=Protocol for the enrollment on the official COG registry, The Childhood Cancer Research Network (CCRN); ADVL0018=Phase I study of hu14.18-IL2 fusion protein in patients with refractory neuroblastoma and other refractory GD2 expressing tumors; ADVL0212=A Phase I study of depsipeptide (NSC#630176, IND# 51810) in pediatric patients with refractory solid tumors and leukemias; ADVL0214=A phase I study of single agent OSI-774 (Tarceva) (NSC # 718781, IND #63383) followed by OSI-774 with temozolomide for patients with selected recurrent/refractory solid tumors, including brain tumors; ADVL0215=A phase I study of decitabine in combination with doxorubicin and cyclophosphamide in the treatment of relapsed or refractory solid tumors; ADVL0421=A phase II study of oxaliplatin in children with recurrent solid tumors; ADVL0524=Phase II trial of ixabepilone (BMS-247550), an epothilone B analog, in children and young adults with refractory solid tumors; ADVL0525=A phase II study of pemetrexed in children with recurrent malignancies; ADVL06B1=A pharmacokinetic-pharmacodynamic-pharmacogenetic study of actinomycin-D and vincristine in children with cancer; ADVL0714=A phase I study of VEGF trap (NSC# 724770, IND# 100137) in children with refractory solid tumors; ALTE03N1=Key adverse events after childhood cancer; ALTE05N1=Umbrella long-term follow-up protocol; ANBL0032=Phase III randomized study of chimeric antibody 14.18 (Ch14.18) in high risk neuroblastoma following myeloablative therapy and autologous stem cell rescue; ANBL00B1=Neuroblastoma biology studies; ANBL00P1=A pilot study of tandem high dose chemotherapy with stem cell rescue following induction therapy in children with high risk neuroblastoma; ANBL02P1=A pilot induction regimen incorporating dose-intensive topotecan and cyclophosphamide for treatment of newly diagnosed high risk neuroblastoma; ANBL0321=Phase II study of fenretinide in pediatric patients with resistant or recurrent neuroblastoma; ANBL0322=A phase II study of hu14.18-IL2 (BB-IND-9728) in children with recurrent or refractory neuroblastoma; ANBL0532=Phase III randomized trial of single vs. tandem myeloablative as consolidation therapy for high-risk neuroblastoma; ANBL0621=A phase II study of ABT-751, an orally bioavailable tubulin binding agent, in children with relapsed or refractory neuroblastoma; B003=Diagnostic & prognostic studies in NBL; B903=Childhood cancer genetics; B947=Protocol for collection of biology specimens for research studies; B954=Opsoclonus-myoclonus-ataxia syndrome, neuroblastoma and the presence of anti-neuronal antibodies; B973=Laboratory-clinical studies of neuroblastoma; E04=Self-administered epidemiology questionnaire; E18=A case-control study of risk factors for neuroblastoma; I03=Neuroblastoma, diagnostic/prognostic; N891=Parents' perceptions of randomization; P9462=Randomized treatment of recurrent neuroblastoma with topotecan regimens following desferrioxamine (POG only) in an investigational window; P9641=Primary surgical therapy for biologically defined low-risk neuroblastoma; P9761=A phase II trial of irinotecan in children with refractory solid tumors; P9963=A phase II trial of rebeccamycin analogue (NSC #655649) in children with solid tumors; R9702=Prognostic implications of MIBG uptake in patients with neuroblastoma previously treated on CCG-3891; S31=Right atrial catheter study; S921=Comparison of urokinase vs heparin in preventing Infection in central venous devices in children with malignancies]"""
+#     chunks = ""
+#     is_the_beginning = True
+#     max_chars = 2000
+#     padding_words = 3
+#     counter = 0
+#     print(len(text))
+#     # divvy up into chunks,  sum of each chunk should equal the original text.
+#     for chunk in annotator.sliding_window(text=text, max_characters=max_chars, padding_words= padding_words):
+#         assert len(chunk) <= max_chars
+#         counter += 1
+#         if is_the_beginning:
+#             chunks += chunk
+#         else:
+#             # remove redundand padded words from final result
+#             chunks += " ".join(chunk.split(" ")[padding_words:])
+#         is_the_beginning = False
+
+#     print(counter)
+#     # since spaces are trimmed by tokenizer , we can execuled all spaces and do char
+#     assert chunks == text
\ No newline at end of file
diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py
index e55b6882..cd35ba30 100644
--- a/tests/unit/test_api.py
+++ b/tests/unit/test_api.py
@@ -6,24 +6,25 @@
 import pytest
 pytest.skip("skipping as dug.api is no longer present", allow_module_level=True)
 from pytest import mark
+import pytest_asyncio
 
 from dug.api import app, main, DugResource
 
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def dug_api_test_client():
     with app.test_client() as client:
         yield client
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def mock_g_object():
     with patch('dug.api.dug') as g:
         yield g
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def mock_search_concepts(mock_g_object):
     mock_g_object().search_concepts.return_value = {'hits': {'hits': [
         {'_type': '_doc',
@@ -38,21 +39,21 @@ def mock_search_concepts(mock_g_object):
     }}
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def mock_search_kg(mock_g_object):
     mock_g_object().search_kg.return_value = {'hits': {'hits': [
         {'_type': '_doc', '_id': 'MEDDRA:10047249'}
     ]}}
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def mock_search_variables(mock_g_object):
     mock_g_object().search_variables.return_value = {'hits': {'hits': [
         {'_type': '_doc', '_id': 'MEDDRA:10047249'}
     ]}}
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def mock_agg_data_types(mock_g_object):
     mock_g_object().agg_data_type.return_value = ["DBGaP"]
 
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index 99f903dd..3a2d97eb 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -25,25 +25,31 @@ def test_dug_cli_parser():
 @patch('dug.cli.crawl')
 def test_dug_cli_main_crawl(mock_crawl):
     main(["crawl", "somefile.csv", "--parser", "topmedtag"])
-    assert mock_crawl.called_once()
+    mock_crawl.assert_called_once()
 
 @mark.cli
 @patch('dug.cli.crawl')
 def test_dug_cli_main_extract_dug_elements(mock_crawl):
     main(["crawl", "somefile.csv", "--parser", "topmedtag", "-x"])
-    assert mock_crawl.called_once()
+    mock_crawl.assert_called_once()
     assert mock_crawl.call_args_list[0].args[0].extract_dug_elements
 
 @mark.cli
 @patch('dug.cli.crawl')
 def test_dug_cli_main_extract_dug_elements_none(mock_crawl):
     main(["crawl", "somefile.csv", "--parser", "topmedtag"])
-    assert mock_crawl.called_once()
+    mock_crawl.assert_called_once()
     assert not mock_crawl.call_args_list[0].args[0].extract_dug_elements
 
+@mark.cli
+@patch('dug.cli.crawl')
+def test_dug_cli_main_annotator(mock_crawl):
+    main(["crawl", "somefile.csv","--parser", "topmedtag", "--annotator", "annotator-monarch"])
+    mock_crawl.assert_called_once()
+
 @mark.cli
 @patch('dug.cli.search')
 def test_dug_cli_main_search(mock_search):
     # mock_search.search.return_value = "Searching!"
     main(["search", "-q", "heart attack", "-t", "variables", "-k", "namespace=default"])
-    assert mock_search.called_once()
+    mock_search.assert_called_once()
diff --git a/tests/unit/test_core/test_search.py b/tests/unit/test_core/test_search.py
index 5ec58468..db7ed75d 100644
--- a/tests/unit/test_core/test_search.py
+++ b/tests/unit/test_core/test_search.py
@@ -3,22 +3,25 @@
 from unittest.mock import patch
 
 import pytest
+import pytest_asyncio
 
 from dug.core.index import Index, SearchException
 from dug.config import Config
 
-default_indices = ['concepts_index', 'variables_index', 'kg_index']
+default_indices = ["concepts_index", "variables_index", "kg_index"]
 
-host = 'localhost'
+host = "localhost"
 port = 9200
-username = 'elastic'
-password = 'hunter2'
-nboost_host = 'localhost'
-hosts = [{'host': host, 'port': port, 'scheme': 'http'}]
+username = "elastic"
+password = "hunter2"
+nboost_host = "localhost"
+hosts = [{"host": host, "port": port, "scheme": "http"}]
 
-class MockEsNode():
+
+class MockEsNode:
     def info():
-        return {"_nodes" : {"total": 1}}
+        return {"_nodes": {"total": 1}}
+
 
 @dataclass
 class MockIndex:
@@ -37,33 +40,34 @@ def get(self, id):
 
     def count(self, body):
         return len(self.values)
-    
 
 
 class MockIndices:
-
     def __init__(self):
         self._indices = {}
         self.call_count = 0
+        self.number_of_replicas = 1
 
     def exists(self, index):
         return index in self._indices
 
-    def create(
-            self,
-            index,
-            body,
-            **_kwargs
-    ):
+    def create(self, index, body, **_kwargs):
         self.call_count += 1
         self._indices[index] = MockIndex(**body)
 
     def get_index(self, index) -> MockIndex:
         return self._indices.get(index)
 
+    def get_settings(self, index):
+        index_schema = {"settings": {"index": {"number_of_replicas": self.number_of_replicas}}}
+        settings = {
+            "kg_index": index_schema,
+            "concepts_index": index_schema,
+            "variables_index": index_schema,
+        }
+        return settings
 
 class MockElastic:
-
     def __init__(self, indices: MockIndices):
         self.indices = indices
         self._up = True
@@ -85,36 +89,28 @@ def disconnect(self):
         self._up = False
 
     def count(self, body, index):
-        return {
-            'count': self.indices.get_index(index).count(body)
-        }
+        return {"count": self.indices.get_index(index).count(body)}
 
     def search(self, index, body, **kwargs):
         values = self.indices.get_index(index).values
-        return {
-            'results': {
-                k: v
-                for k, v in values.items()
-                if body in v
-            }
-        }
-
-    
+        return {"results": {k: v for k, v in values.items() if body in v}}
 
 
-@pytest.fixture
+@pytest_asyncio.fixture
 def elastic():
-    with patch('dug.core.index.Elasticsearch') as es_class:
+    with patch("dug.core.index.Elasticsearch") as es_class:
         es_instance = MockElastic(indices=MockIndices())
         es_class.return_value = es_instance
         yield es_instance
 
 
 def test_init(elastic):
-    cfg = Config(elastic_host='localhost',
-                 elastic_username='elastic',
-                 elastic_password='hunter2',
-                 nboost_host='localhost')
+    cfg = Config(
+        elastic_host="localhost",
+        elastic_username="elastic",
+        elastic_password="hunter2",
+        nboost_host="localhost",
+    )
 
     search = Index(cfg)
 
@@ -128,6 +124,7 @@ def test_init_no_ping(elastic):
     with pytest.raises(SearchException):
         _search = Index(Config.from_env())
 
+
 @pytest.mark.asyncio
 async def test_init_indices(elastic):
     search = Index(Config.from_env())
@@ -141,16 +138,17 @@ async def test_init_indices(elastic):
 def test_index_doc(elastic: MockElastic):
     search = Index(Config.from_env())
 
-    assert len(elastic.indices.get_index('concepts_index').values) == 0
-    search.index_doc('concepts_index', {'name': 'sample'}, "ID:1")
-    assert len(elastic.indices.get_index('concepts_index').values) == 1
-    assert elastic.indices.get_index('concepts_index').get("ID:1") == {'name': 'sample'}
+    assert len(elastic.indices.get_index("concepts_index").values) == 0
+    search.index_doc("concepts_index", {"name": "sample"}, "ID:1")
+    assert len(elastic.indices.get_index("concepts_index").values) == 1
+    assert elastic.indices.get_index("concepts_index").get("ID:1") == {"name": "sample"}
 
 
 def test_update_doc(elastic: MockElastic):
     search = Index(Config.from_env())
 
-    search.index_doc('concepts_index', {'name': 'sample'}, "ID:1")
-    search.update_doc('concepts_index', {'name': 'new value!'}, "ID:1")
-    assert elastic.indices.get_index('concepts_index').get("ID:1") == {'name': 'new value!'}
-
+    search.index_doc("concepts_index", {"name": "sample"}, "ID:1")
+    search.update_doc("concepts_index", {"name": "new value!"}, "ID:1")
+    assert elastic.indices.get_index("concepts_index").get("ID:1") == {
+        "name": "new value!"
+    }
diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py
index 1907bf30..f8e15694 100644
--- a/tests/unit/test_crawler.py
+++ b/tests/unit/test_crawler.py
@@ -31,7 +31,7 @@ def test_annotate_element(crawler):
         "collection-desc"
     )
     crawler.annotate_element(element)
-    AnnotatorMock.annotate.assert_called_with(**{
+    AnnotatorMock.assert_called_with(**{
         "text": element.ml_ready_desc,
         "http_session": HTTPSessionMock
     })
diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py
index c37df40e..491bfe9f 100644
--- a/tests/unit/test_parsers.py
+++ b/tests/unit/test_parsers.py
@@ -1,12 +1,13 @@
-from dug.core.annotate import Identifier
 from dug.core.parsers._base import DugElement, DugConcept
+from dug.core.annotators import DugIdentifier, AnnotateMonarch
+# from dug.core.annotators.monarch_annotator import AnnotateMonarch
 
 
 def test_dug_concept():
     concept = DugConcept("concept-1", 'Concept-1', 'The first concept', 'secondary')
 
-    ident_1 = Identifier("ident-1", "Identifier-1")
-    ident_2 = Identifier("ident-2", "Identifier-2")
+    ident_1 = DugIdentifier("ident-1", "Identifier-1")
+    ident_2 = DugIdentifier("ident-2", "Identifier-2")
 
     concept.add_identifier(ident_1)
     concept.add_identifier(ident_2)
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index fd841a8a..df6f9e98 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,33 +1,33 @@
-import pytest
+# import pytest
 
-from dug.utils import get_nida_study_link
-import requests
+# from dug.utils import get_nida_study_link
+# import requests
 
-@pytest.mark.skip("Implement this test")
-def test_object_factory():
-    pass
+# @pytest.mark.skip("Implement this test")
+# def test_object_factory():
+#     pass
 
 
-@pytest.mark.skip("Implement this test")
-def test_complex_handler():
-    pass
+# @pytest.mark.skip("Implement this test")
+# def test_complex_handler():
+#     pass
 
 
-@pytest.mark.skip("Implement this test")
-def test_get_dbgap_var_link():
-    pass
+# @pytest.mark.skip("Implement this test")
+# def test_get_dbgap_var_link():
+#     pass
 
 
-@pytest.mark.skip("Implement this test")
-def test_get_dbgap_study_link():
-    pass
+# @pytest.mark.skip("Implement this test")
+# def test_get_dbgap_study_link():
+#     pass
 
 
-def test_get_nida_study_link():
-    study_id = "NIDA-CPU-0008"
-    link = get_nida_study_link(study_id=study_id)
-    response = requests.post(
-        url=link
-    )
-    content = str(response.text)
-    assert content.count(study_id) > 0
+# def test_get_nida_study_link():
+#     study_id = "NIDA-CPU-0008"
+#     link = get_nida_study_link(study_id=study_id)
+#     response = requests.post(
+#         url=link
+#     )
+#     content = str(response.text)
+#     assert content.count(study_id) > 0