From 6cc28ad36a7a72513bf2433b820c1d62a25e9557 Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Thu, 6 Jun 2024 11:20:22 +0200
Subject: [PATCH 1/6] add github ci for eval

---
 .github/workflows/ci-eval-pr.yml | 94 ++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 .github/workflows/ci-eval-pr.yml

diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml
new file mode 100644
index 000000000..34055ce86
--- /dev/null
+++ b/.github/workflows/ci-eval-pr.yml
@@ -0,0 +1,94 @@
+name: CI
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - trulens_eval/**/*
+
+jobs:
+  PRBranchProtect:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - python-version: 3.12
+            tests-folder: tests/unit
+          - python-version: 3.9
+            tests-folder: tests/unit/static
+          - python-version: 3.10
+            tests-folder: tests/unit/static
+          - python-version: 3.11
+            tests-folder: tests/unit/static
+    timeout-minutes: 30
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+          clean: true
+
+      - name: Setup conda
+        run: |
+          echo "##vso[task.prependpath]$CONDA/bin"
+          set -e
+          conda create -y --quiet -n ${{ matrix.python-version }} python=${{ matrix.python-version }}
+          conda init bash
+
+      - name: Install and run formatters
+        if: ${{ matrix.python-version }} == '3.11'
+        run: |
+          source activate ${{ matrix.python-version }}
+          pip install yapf==0.32.0 isort==5.10.1 --verbose
+          ./format.sh --eval
+          num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l`
+          if [ $num_changed_files -ne 0 ]; then
+            echo "The following files have changed after running format.sh. Please format your code and update the PR."
+            git ls-files --others -m --exclude-standard ./trulens_eval
+            git diff
+          fi
+        shell: bash
+
+      - name: Install trulens
+        run: |
+          source activate ${{ matrix.python-version }}
+          cd ./trulens_eval
+          pip install -e . --verbose
+        shell: bash
+
+      - name: Install testing packages
+        run: |
+          source activate ${{ matrix.python-version }}
+          pip install pytest==7.0.1 pytest-subtests pytest-azurepipelines --verbose
+        shell: bash
+
+      - name: Describe python env
+        run: |
+          source activate ${{ matrix.python-version }}
+          python --version
+          pip --version
+          pip list --verbose
+        shell: bash
+
+      - name: Unit tests with required packages
+        run: |
+          source activate ${{ matrix.python-version }}
+          cd ./trulens_eval
+          python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }}
+        shell: bash
+
+    #   - name: Install optional packages and run tests
+    #     run: |
+    #       source activate ${{ matrix.python-version }}
+    #       cd ./trulens_eval
+    #       pip install --verbose -r trulens_eval/requirements.optional.txt
+    #       python -m pytest --test-run-title="Optional ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }}
+    #     shell: bash
+    #     env:
+    #       TEST_OPTIONAL: true
+    #       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+    #       HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
+    #       PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
+    #       PINECONE_ENV: ${{ secrets.PINECONE_ENV }}
+    #       HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }}

From bbd9b2a56af4a86927fabf09eb9a6e43c85972d5 Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Thu, 6 Jun 2024 11:34:04 +0200
Subject: [PATCH 2/6] fix ci

---
 .github/workflows/ci-eval-pr.yml | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml
index 34055ce86..089a893f1 100644
--- a/.github/workflows/ci-eval-pr.yml
+++ b/.github/workflows/ci-eval-pr.yml
@@ -29,17 +29,18 @@ jobs:
           fetch-depth: 0
           clean: true
 
-      - name: Setup conda
-        run: |
-          echo "##vso[task.prependpath]$CONDA/bin"
-          set -e
-          conda create -y --quiet -n ${{ matrix.python-version }} python=${{ matrix.python-version }}
-          conda init bash
+      - name: Setup Conda
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          auto-update-conda: true
+          python-version: ${{ matrix.python-version }}
+          activate-environment: myenv
+          use-only-tar-bz2: true
 
       - name: Install and run formatters
         if: ${{ matrix.python-version }} == '3.11'
         run: |
-          source activate ${{ matrix.python-version }}
+          conda activate myenv
           pip install yapf==0.32.0 isort==5.10.1 --verbose
           ./format.sh --eval
           num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l`
@@ -52,20 +53,20 @@ jobs:
 
       - name: Install trulens
         run: |
-          source activate ${{ matrix.python-version }}
+          conda activate myenv
           cd ./trulens_eval
           pip install -e . --verbose
         shell: bash
 
       - name: Install testing packages
         run: |
-          source activate ${{ matrix.python-version }}
+          conda activate myenv
           pip install pytest==7.0.1 pytest-subtests pytest-azurepipelines --verbose
         shell: bash
 
       - name: Describe python env
         run: |
-          source activate ${{ matrix.python-version }}
+          conda activate myenv
           python --version
           pip --version
           pip list --verbose
@@ -73,7 +74,7 @@ jobs:
 
       - name: Unit tests with required packages
         run: |
-          source activate ${{ matrix.python-version }}
+          conda activate myenv
           cd ./trulens_eval
           python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }}
         shell: bash

From a35e4bd2704387c3480486d82bb7d65c0c12a840 Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Thu, 6 Jun 2024 11:41:53 +0200
Subject: [PATCH 3/6] fix ci attempt 2

---
 .github/workflows/ci-eval-pr.yml | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml
index 089a893f1..a3d553a3d 100644
--- a/.github/workflows/ci-eval-pr.yml
+++ b/.github/workflows/ci-eval-pr.yml
@@ -6,6 +6,7 @@ on:
       - main
     paths:
       - trulens_eval/**/*
+      - .github/workflows/ci-eval-pr.yml
 
 jobs:
   PRBranchProtect:
@@ -30,12 +31,10 @@ jobs:
           clean: true
 
       - name: Setup Conda
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v3
         with:
-          auto-update-conda: true
           python-version: ${{ matrix.python-version }}
           activate-environment: myenv
-          use-only-tar-bz2: true
 
       - name: Install and run formatters
         if: ${{ matrix.python-version }} == '3.11'
@@ -49,20 +48,20 @@ jobs:
             git ls-files --others -m --exclude-standard ./trulens_eval
             git diff
           fi
-        shell: bash
+        shell: bash -el {0}
 
       - name: Install trulens
         run: |
           conda activate myenv
           cd ./trulens_eval
           pip install -e . --verbose
-        shell: bash
+        shell: bash -el {0}
 
       - name: Install testing packages
         run: |
           conda activate myenv
           pip install pytest==7.0.1 pytest-subtests pytest-azurepipelines --verbose
-        shell: bash
+        shell: bash -el {0}
 
       - name: Describe python env
         run: |
@@ -70,22 +69,22 @@ jobs:
           python --version
           pip --version
           pip list --verbose
-        shell: bash
+        shell: bash -el {0}
 
       - name: Unit tests with required packages
         run: |
           conda activate myenv
           cd ./trulens_eval
           python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }}
-        shell: bash
+        shell: bash -el {0}
 
     #   - name: Install optional packages and run tests
     #     run: |
-    #       source activate ${{ matrix.python-version }}
+    #       conda activate myenv
     #       cd ./trulens_eval
     #       pip install --verbose -r trulens_eval/requirements.optional.txt
     #       python -m pytest --test-run-title="Optional ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }}
-    #     shell: bash
+    #     shell: bash -el {0}
     #     env:
     #       TEST_OPTIONAL: true
     #       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

From 5be402da06481db002ce1127406b9176419191af Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Thu, 6 Jun 2024 11:44:45 +0200
Subject: [PATCH 4/6] disable format check

---
 .github/workflows/ci-eval-pr.yml | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml
index a3d553a3d..641a8f3c9 100644
--- a/.github/workflows/ci-eval-pr.yml
+++ b/.github/workflows/ci-eval-pr.yml
@@ -18,7 +18,7 @@ jobs:
             tests-folder: tests/unit
           - python-version: 3.9
             tests-folder: tests/unit/static
-          - python-version: 3.10
+          - python-version: "3.10"
             tests-folder: tests/unit/static
           - python-version: 3.11
             tests-folder: tests/unit/static
@@ -36,19 +36,19 @@ jobs:
           python-version: ${{ matrix.python-version }}
           activate-environment: myenv
 
-      - name: Install and run formatters
-        if: ${{ matrix.python-version }} == '3.11'
-        run: |
-          conda activate myenv
-          pip install yapf==0.32.0 isort==5.10.1 --verbose
-          ./format.sh --eval
-          num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l`
-          if [ $num_changed_files -ne 0 ]; then
-            echo "The following files have changed after running format.sh. Please format your code and update the PR."
-            git ls-files --others -m --exclude-standard ./trulens_eval
-            git diff
-          fi
-        shell: bash -el {0}
+      # - name: Install and run formatters
+      #   if: ${{ matrix.python-version }} == '3.11'
+      #   run: |
+      #     conda activate myenv
+      #     pip install yapf==0.32.0 isort==5.10.1 --verbose
+      #     ./format.sh --eval
+      #     num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l`
+      #     if [ $num_changed_files -ne 0 ]; then
+      #       echo "The following files have changed after running format.sh. Please format your code and update the PR."
+      #       git ls-files --others -m --exclude-standard ./trulens_eval
+      #       git diff
+      #     fi
+      #   shell: bash -el {0}
 
       - name: Install trulens
         run: |

From 873112d30b9adc404c8603cdb004069063744d28 Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Thu, 6 Jun 2024 12:05:47 +0200
Subject: [PATCH 5/6] add secrets

---
 .github/workflows/ci-eval-pr.yml | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml
index 641a8f3c9..7b908f996 100644
--- a/.github/workflows/ci-eval-pr.yml
+++ b/.github/workflows/ci-eval-pr.yml
@@ -14,13 +14,13 @@ jobs:
     strategy:
       matrix:
         include:
-          - python-version: 3.12
+          - python-version: "3.12"
             tests-folder: tests/unit
-          - python-version: 3.9
+          - python-version: "3.9"
             tests-folder: tests/unit/static
           - python-version: "3.10"
             tests-folder: tests/unit/static
-          - python-version: 3.11
+          - python-version: "3.11"
             tests-folder: tests/unit/static
     timeout-minutes: 30
 
@@ -77,6 +77,11 @@ jobs:
           cd ./trulens_eval
           python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }}
         shell: bash -el {0}
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
+          PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }}
+          PINECONE_ENV: "us-west1-gcp-free"
 
     #   - name: Install optional packages and run tests
     #     run: |

From 836f20de653e63750d549d3dbcbd4c28700eebb9 Mon Sep 17 00:00:00 2001
From: Eric Pinzur <epinzur@gmail.com>
Date: Mon, 3 Jun 2024 19:00:06 +0200
Subject: [PATCH 6/6] added option to make deffered evaluation quieter

---
 trulens_eval/trulens_eval/feedback/provider/base.py | 6 ++----
 trulens_eval/trulens_eval/feedback/provider/hugs.py | 2 +-
 trulens_eval/trulens_eval/tru.py                    | 9 ++++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py
index 2ca01fe73..0fa63a25a 100644
--- a/trulens_eval/trulens_eval/feedback/provider/base.py
+++ b/trulens_eval/trulens_eval/feedback/provider/base.py
@@ -5,7 +5,6 @@
 import nltk
 from nltk.tokenize import sent_tokenize
 import numpy as np
-from tqdm.auto import tqdm
 
 from trulens_eval.feedback import prompts
 from trulens_eval.feedback.provider.endpoint import base as mod_endpoint
@@ -1198,14 +1197,13 @@ def groundedness_measure_with_cot_reasons(
         Returns:
             Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
         """
-        nltk.download('punkt')
+        nltk.download('punkt', quiet=True)
         groundedness_scores = {}
         reasons_str = ""
 
         hypotheses = sent_tokenize(statement)
         system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM
-        for i, hypothesis in enumerate(tqdm(
-                hypotheses, desc="Groundedness per statement in source")):
+        for i, hypothesis in enumerate(hypotheses):
             user_prompt = prompts.LLM_GROUNDEDNESS_USER.format(
                 premise=f"{source}", hypothesis=f"{hypothesis}"
             )
diff --git a/trulens_eval/trulens_eval/feedback/provider/hugs.py b/trulens_eval/trulens_eval/feedback/provider/hugs.py
index 6cd4835c7..a3c7594d2 100644
--- a/trulens_eval/trulens_eval/feedback/provider/hugs.py
+++ b/trulens_eval/trulens_eval/feedback/provider/hugs.py
@@ -218,7 +218,7 @@ def groundedness_measure_with_nli(self, source: str,
         Returns:
             Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
         """
-        nltk.download('punkt')
+        nltk.download('punkt', quiet=True)
         groundedness_scores = {}
 
         reasons_str = ""
diff --git a/trulens_eval/trulens_eval/tru.py b/trulens_eval/trulens_eval/tru.py
index 85edac997..de6da1e07 100644
--- a/trulens_eval/trulens_eval/tru.py
+++ b/trulens_eval/trulens_eval/tru.py
@@ -749,6 +749,8 @@ def start_evaluator(self,
             fork: If set, will start the evaluator in a new process instead of a
                 thread. NOT CURRENTLY SUPPORTED.
 
+            disable_tqdm: If set, will disable progress bar logging from the evaluator.
+
         Returns:
             The started process or thread that is executing the deferred feedback
                 evaluator.
@@ -816,14 +818,15 @@ def runloop():
                 total=queue_total,
                 postfix={
                     status.name: count for status, count in queue_stats.items()
-                }
+                },
+                disable=disable_tqdm
             )
 
             # Show the status of the results so far.
-            tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs")
+            tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs", disable=disable_tqdm)
 
             # Show what is being waited for right now.
-            tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs")
+            tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs", disable=disable_tqdm)
 
             runs_stats = defaultdict(int)