Skip to content

Commit

Permalink
added option to make deffered evaluation quieter
Browse files Browse the repository at this point in the history
  • Loading branch information
epinzur committed Jun 6, 2024
1 parent 1512177 commit 836f20d
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 8 deletions.
6 changes: 2 additions & 4 deletions trulens_eval/trulens_eval/feedback/provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import nltk
from nltk.tokenize import sent_tokenize
import numpy as np
from tqdm.auto import tqdm

from trulens_eval.feedback import prompts
from trulens_eval.feedback.provider.endpoint import base as mod_endpoint
Expand Down Expand Up @@ -1198,14 +1197,13 @@ def groundedness_measure_with_cot_reasons(
Returns:
Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
"""
nltk.download('punkt')
nltk.download('punkt', quiet=True)
groundedness_scores = {}
reasons_str = ""

hypotheses = sent_tokenize(statement)
system_prompt = prompts.LLM_GROUNDEDNESS_SYSTEM
for i, hypothesis in enumerate(tqdm(
hypotheses, desc="Groundedness per statement in source")):
for i, hypothesis in enumerate(hypotheses):
user_prompt = prompts.LLM_GROUNDEDNESS_USER.format(
premise=f"{source}", hypothesis=f"{hypothesis}"
)
Expand Down
2 changes: 1 addition & 1 deletion trulens_eval/trulens_eval/feedback/provider/hugs.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def groundedness_measure_with_nli(self, source: str,
Returns:
Tuple[float, str]: A tuple containing a value between 0.0 (not grounded) and 1.0 (grounded) and a string containing the reasons for the evaluation.
"""
nltk.download('punkt')
nltk.download('punkt', quiet=True)
groundedness_scores = {}

reasons_str = ""
Expand Down
9 changes: 6 additions & 3 deletions trulens_eval/trulens_eval/tru.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,8 @@ def start_evaluator(self,
fork: If set, will start the evaluator in a new process instead of a
thread. NOT CURRENTLY SUPPORTED.
disable_tqdm: If set, will disable progress bar logging from the evaluator.
Returns:
The started process or thread that is executing the deferred feedback
evaluator.
Expand Down Expand Up @@ -816,14 +818,15 @@ def runloop():
total=queue_total,
postfix={
status.name: count for status, count in queue_stats.items()
}
},
disable=disable_tqdm
)

# Show the status of the results so far.
tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs")
tqdm_total = tqdm(desc="Done Runs", initial=0, unit="runs", disable=disable_tqdm)

# Show what is being waited for right now.
tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs")
tqdm_waiting = tqdm(desc="Waiting for Runs", initial=0, unit="runs", disable=disable_tqdm)

runs_stats = defaultdict(int)

Expand Down

0 comments on commit 836f20d

Please sign in to comment.