From 6cc28ad36a7a72513bf2433b820c1d62a25e9557 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Thu, 6 Jun 2024 11:20:22 +0200 Subject: [PATCH 1/6] add github ci for eval --- .github/workflows/ci-eval-pr.yml | 94 ++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .github/workflows/ci-eval-pr.yml diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml new file mode 100644 index 000000000..34055ce86 --- /dev/null +++ b/.github/workflows/ci-eval-pr.yml @@ -0,0 +1,94 @@ +name: CI + +on: + pull_request: + branches: + - main + paths: + - trulens_eval/**/* + +jobs: + PRBranchProtect: + runs-on: ubuntu-latest + strategy: + matrix: + include: + - python-version: 3.12 + tests-folder: tests/unit + - python-version: 3.9 + tests-folder: tests/unit/static + - python-version: 3.10 + tests-folder: tests/unit/static + - python-version: 3.11 + tests-folder: tests/unit/static + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + clean: true + + - name: Setup conda + run: | + echo "##vso[task.prependpath]$CONDA/bin" + set -e + conda create -y --quiet -n ${{ matrix.python-version }} python=${{ matrix.python-version }} + conda init bash + + - name: Install and run formatters + if: ${{ matrix.python-version }} == '3.11' + run: | + source activate ${{ matrix.python-version }} + pip install yapf==0.32.0 isort==5.10.1 --verbose + ./format.sh --eval + num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l` + if [ $num_changed_files -ne 0 ]; then + echo "The following files have changed after running format.sh. Please format your code and update the PR." + git ls-files --others -m --exclude-standard ./trulens_eval + git diff + fi + shell: bash + + - name: Install trulens + run: | + source activate ${{ matrix.python-version }} + cd ./trulens_eval + pip install -e . --verbose + shell: bash + + - name: Install testing packages + run: | + source activate ${{ matrix.python-version }} + pip install pytest==7.0.1 pytest-subtests pytest-azurepipelines --verbose + shell: bash + + - name: Describe python env + run: | + source activate ${{ matrix.python-version }} + python --version + pip --version + pip list --verbose + shell: bash + + - name: Unit tests with required packages + run: | + source activate ${{ matrix.python-version }} + cd ./trulens_eval + python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }} + shell: bash + + # - name: Install optional packages and run tests + # run: | + # source activate ${{ matrix.python-version }} + # cd ./trulens_eval + # pip install --verbose -r trulens_eval/requirements.optional.txt + # python -m pytest --test-run-title="Optional ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }} + # shell: bash + # env: + # TEST_OPTIONAL: true + # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }} + # PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + # PINECONE_ENV: ${{ secrets.PINECONE_ENV }} + # HUGGINGFACEHUB_API_TOKEN: ${{ secrets.HUGGINGFACEHUB_API_TOKEN }} From bbd9b2a56af4a86927fabf09eb9a6e43c85972d5 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Thu, 6 Jun 2024 11:34:04 +0200 Subject: [PATCH 2/6] fix ci --- .github/workflows/ci-eval-pr.yml | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml index 34055ce86..089a893f1 100644 --- a/.github/workflows/ci-eval-pr.yml +++ b/.github/workflows/ci-eval-pr.yml @@ -29,17 +29,18 @@ jobs: fetch-depth: 0 clean: true - - name: Setup conda - run: | - echo "##vso[task.prependpath]$CONDA/bin" - set -e - conda create -y --quiet -n ${{ matrix.python-version }} python=${{ matrix.python-version }} - conda init bash + - name: Setup Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + activate-environment: myenv + use-only-tar-bz2: true - name: Install and run formatters if: ${{ matrix.python-version }} == '3.11' run: | - source activate ${{ matrix.python-version }} + conda activate myenv pip install yapf==0.32.0 isort==5.10.1 --verbose ./format.sh --eval num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l` @@ -52,20 +53,20 @@ jobs: - name: Install trulens run: | - source activate ${{ matrix.python-version }} + conda activate myenv cd ./trulens_eval pip install -e . --verbose shell: bash - name: Install testing packages run: | - source activate ${{ matrix.python-version }} + conda activate myenv pip install pytest==7.0.1 pytest-subtests pytest-azurepipelines --verbose shell: bash - name: Describe python env run: | - source activate ${{ matrix.python-version }} + conda activate myenv python --version pip --version pip list --verbose @@ -73,7 +74,7 @@ jobs: - name: Unit tests with required packages run: | - source activate ${{ matrix.python-version }} + conda activate myenv cd ./trulens_eval python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }} shell: bash From a35e4bd2704387c3480486d82bb7d65c0c12a840 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Thu, 6 Jun 2024 11:41:53 +0200 Subject: [PATCH 3/6] fix ci attempt 2 --- .github/workflows/ci-eval-pr.yml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml index 089a893f1..a3d553a3d 100644 --- a/.github/workflows/ci-eval-pr.yml +++ b/.github/workflows/ci-eval-pr.yml @@ -6,6 +6,7 @@ on: - main paths: - trulens_eval/**/* + - .github/workflows/ci-eval-pr.yml jobs: PRBranchProtect: @@ -30,12 +31,10 @@ jobs: clean: true - name: Setup Conda - uses: conda-incubator/setup-miniconda@v2 + uses: conda-incubator/setup-miniconda@v3 with: - auto-update-conda: true python-version: ${{ matrix.python-version }} activate-environment: myenv - use-only-tar-bz2: true - name: Install and run formatters if: ${{ matrix.python-version }} == '3.11' @@ -49,20 +48,20 @@ jobs: git ls-files --others -m --exclude-standard ./trulens_eval git diff fi - shell: bash + shell: bash -el {0} - name: Install trulens run: | conda activate myenv cd ./trulens_eval pip install -e . --verbose - shell: bash + shell: bash -el {0} - name: Install testing packages run: | conda activate myenv pip install pytest==7.0.1 pytest-subtests pytest-azurepipelines --verbose - shell: bash + shell: bash -el {0} - name: Describe python env run: | @@ -70,22 +69,22 @@ jobs: python --version pip --version pip list --verbose - shell: bash + shell: bash -el {0} - name: Unit tests with required packages run: | conda activate myenv cd ./trulens_eval python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }} - shell: bash + shell: bash -el {0} # - name: Install optional packages and run tests # run: | - # source activate ${{ matrix.python-version }} + # conda activate myenv # cd ./trulens_eval # pip install --verbose -r trulens_eval/requirements.optional.txt # python -m pytest --test-run-title="Optional ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }} - # shell: bash + # shell: bash -el {0} # env: # TEST_OPTIONAL: true # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} From 5be402da06481db002ce1127406b9176419191af Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Thu, 6 Jun 2024 11:44:45 +0200 Subject: [PATCH 4/6] disable format check --- .github/workflows/ci-eval-pr.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml index a3d553a3d..641a8f3c9 100644 --- a/.github/workflows/ci-eval-pr.yml +++ b/.github/workflows/ci-eval-pr.yml @@ -18,7 +18,7 @@ jobs: tests-folder: tests/unit - python-version: 3.9 tests-folder: tests/unit/static - - python-version: 3.10 + - python-version: "3.10" tests-folder: tests/unit/static - python-version: 3.11 tests-folder: tests/unit/static @@ -36,19 +36,19 @@ jobs: python-version: ${{ matrix.python-version }} activate-environment: myenv - - name: Install and run formatters - if: ${{ matrix.python-version }} == '3.11' - run: | - conda activate myenv - pip install yapf==0.32.0 isort==5.10.1 --verbose - ./format.sh --eval - num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l` - if [ $num_changed_files -ne 0 ]; then - echo "The following files have changed after running format.sh. Please format your code and update the PR." - git ls-files --others -m --exclude-standard ./trulens_eval - git diff - fi - shell: bash -el {0} + # - name: Install and run formatters + # if: ${{ matrix.python-version }} == '3.11' + # run: | + # conda activate myenv + # pip install yapf==0.32.0 isort==5.10.1 --verbose + # ./format.sh --eval + # num_changed_files=`git ls-files --others -m --exclude-standard ./trulens_eval | wc -l` + # if [ $num_changed_files -ne 0 ]; then + # echo "The following files have changed after running format.sh. Please format your code and update the PR." + # git ls-files --others -m --exclude-standard ./trulens_eval + # git diff + # fi + # shell: bash -el {0} - name: Install trulens run: | From 873112d30b9adc404c8603cdb004069063744d28 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Thu, 6 Jun 2024 12:05:47 +0200 Subject: [PATCH 5/6] add secrets --- .github/workflows/ci-eval-pr.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-eval-pr.yml b/.github/workflows/ci-eval-pr.yml index 641a8f3c9..7b908f996 100644 --- a/.github/workflows/ci-eval-pr.yml +++ b/.github/workflows/ci-eval-pr.yml @@ -14,13 +14,13 @@ jobs: strategy: matrix: include: - - python-version: 3.12 + - python-version: "3.12" tests-folder: tests/unit - - python-version: 3.9 + - python-version: "3.9" tests-folder: tests/unit/static - python-version: "3.10" tests-folder: tests/unit/static - - python-version: 3.11 + - python-version: "3.11" tests-folder: tests/unit/static timeout-minutes: 30 @@ -77,6 +77,11 @@ jobs: cd ./trulens_eval python -m pytest --test-run-title="Required ${{ matrix.python-version }} unit tests" ${{ matrix.tests-folder }} shell: bash -el {0} + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }} + PINECONE_API_KEY: ${{ secrets.PINECONE_API_KEY }} + PINECONE_ENV: "us-west1-gcp-free" # - name: Install optional packages and run tests # run: | From 836f20de653e63750d549d3dbcbd4c28700eebb9 Mon Sep 17 00:00:00 2001 From: Eric Pinzur Date: Mon, 3 Jun 2024 19:00:06 +0200 Subject: [PATCH 6/6] added option to make deffered evaluation quieter --- trulens_eval/trulens_eval/feedback/provider/base.py | 6 ++---- trulens_eval/trulens_eval/feedback/provider/hugs.py | 2 +- trulens_eval/trulens_eval/tru.py | 9 ++++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py index 2ca01fe73..0fa63a25a 100644 --- a/trulens_eval/trulens_eval/feedback/provider/base.py +++ b/trulens_eval/trulens_eval/feedback/provider/base.py @@ -5,7 +5,6 @@ import nltk from nltk.tokenize import sent_tokenize import numpy as np -from tqdm.auto import tqdm from trulens_eval.feedback import prompts from trulens_eval.feedback.provider.endpoint import base as mod_endpoint @@ -1198,14 +1197,13 @@ def groundedness_measure_with_cot_reasons( Returns: Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation. """ - nltk.download('punkt') + nltk.download('punkt', quiet=True) groundedness_scores = {} reasons_str = "" hypotheses = sent_tokenize(statement) system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM - for i, hypothesis in enumerate(tqdm( - hypotheses, desc="Groundedness per statement in source")): + for i, hypothesis in enumerate(hypotheses): user_prompt = prompts.LLM_GROUNDEDNESS_USER.format( premise=f"{source}", hypothesis=f"{hypothesis}" ) diff --git a/trulens_eval/trulens_eval/feedback/provider/hugs.py b/trulens_eval/trulens_eval/feedback/provider/hugs.py index 6cd4835c7..a3c7594d2 100644 --- a/trulens_eval/trulens_eval/feedback/provider/hugs.py +++ b/trulens_eval/trulens_eval/feedback/provider/hugs.py @@ -218,7 +218,7 @@ def groundedness_measure_with_nli(self, source: str, Returns: Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation. """ - nltk.download('punkt') + nltk.download('punkt', quiet=True) groundedness_scores = {} reasons_str = "" diff --git a/trulens_eval/trulens_eval/tru.py b/trulens_eval/trulens_eval/tru.py index 85edac997..de6da1e07 100644 --- a/trulens_eval/trulens_eval/tru.py +++ b/trulens_eval/trulens_eval/tru.py @@ -749,6 +749,8 @@ def start_evaluator(self, fork: If set, will start the evaluator in a new process instead of a thread. NOT CURRENTLY SUPPORTED. + disable_tqdm: If set, will disable progress bar logging from the evaluator. + Returns: The started process or thread that is executing the deferred feedback evaluator. @@ -816,14 +818,15 @@ def runloop(): total=queue_total, postfix={ status.name: count for status, count in queue_stats.items() - } + }, + disable=disable_tqdm ) # Show the status of the results so far. - tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs") + tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs", disable=disable_tqdm) # Show what is being waited for right now. - tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs") + tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs", disable=disable_tqdm) runs_stats = defaultdict(int)