added option to make deffered evaluation quieter

truera · Jun 6, 2024 · 836f20d · 836f20d
1 parent 1512177
commit 836f20d
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 8 deletions.
diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py
@@ -5,7 +5,6 @@
 import nltk
 from nltk.tokenize import sent_tokenize
 import numpy as np
-from tqdm.auto import tqdm
 
 from trulens_eval.feedback import prompts
 from trulens_eval.feedback.provider.endpoint import base as mod_endpoint
@@ -1198,14 +1197,13 @@ def groundedness_measure_with_cot_reasons(
         Returns:
             Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
         """
-        nltk.download('punkt')
+        nltk.download('punkt', quiet=True)
         groundedness_scores = {}
         reasons_str = ""
 
         hypotheses = sent_tokenize(statement)
         system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM
-        for i, hypothesis in enumerate(tqdm(
-                hypotheses, desc="Groundedness per statement in source")):
+        for i, hypothesis in enumerate(hypotheses):
             user_prompt = prompts.LLM_GROUNDEDNESS_USER.format(
                 premise=f"{source}", hypothesis=f"{hypothesis}"
             )

diff --git a/trulens_eval/trulens_eval/feedback/provider/hugs.py b/trulens_eval/trulens_eval/feedback/provider/hugs.py
@@ -218,7 +218,7 @@ def groundedness_measure_with_nli(self, source: str,
         Returns:
             Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
         """
-        nltk.download('punkt')
+        nltk.download('punkt', quiet=True)
         groundedness_scores = {}
 
         reasons_str = ""

diff --git a/trulens_eval/trulens_eval/tru.py b/trulens_eval/trulens_eval/tru.py
@@ -749,6 +749,8 @@ def start_evaluator(self,
             fork: If set, will start the evaluator in a new process instead of a
                 thread. NOT CURRENTLY SUPPORTED.
 
+            disable_tqdm: If set, will disable progress bar logging from the evaluator.
+
         Returns:
             The started process or thread that is executing the deferred feedback
                 evaluator.
@@ -816,14 +818,15 @@ def runloop():
                 total=queue_total,
                 postfix={
                     status.name: count for status, count in queue_stats.items()
-                }
+                },
+                disable=disable_tqdm
             )
 
             # Show the status of the results so far.
-            tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs")
+            tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs", disable=disable_tqdm)
 
             # Show what is being waited for right now.
-            tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs")
+            tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs", disable=disable_tqdm)
 
             runs_stats = defaultdict(int)