Skip to content

Commit

Permalink
Moved deep_eval to input for combine outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
dividor committed Mar 28, 2024
1 parent bdad1fe commit e141d0c
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 24 deletions.
25 changes: 10 additions & 15 deletions flows/reliefweb_chat/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,19 @@ def get_model_name(self):
# Deep eval, see https://github.com/confident-ai/deepeval
@tool
def test_case(
processed_output: dict, conn: AzureOpenAIConnection, deployment_name: str
):
rweb_results: str,
user_question: str,
actual_output: str,
conn: AzureOpenAIConnection,
deployment_name: str,
) -> dict:
"""
An example function for evaluating a question using the deepeval library.
Args:
processed_output (dict): The processed output containing the necessary data for evaluation.
rweb_results (str): The results from the ReliefWeb API.
user_question (str): The user question to evaluate.
actual_output (str): The actual output to evaluate against.
conn (AzureOpenAIConnection): The AzureOpenAIConnection object for connecting to Azure services.
deployment_name (str): The name of the deployment.
Expand All @@ -95,13 +101,6 @@ def test_case(
"""
conn_dict = dict(conn)

rweb_results = processed_output["rweb_results"]
input = ""
for r in rweb_results:
input += r["title"] + " " + str(r["body"])

actual_output = processed_output["llm_summary_result_processed"]

# Set up LLM connection
custom_model = AzureChatOpenAI(
openai_api_version=conn_dict["api_version"],
Expand All @@ -111,10 +110,6 @@ def test_case(
)
model = AzureOpenAI(model=custom_model)

user_question = processed_output["user_question"]
actual_output = (processed_output["llm_question_result"],)
rweb_results = str(processed_output["rweb_results"])

print("user_question: ", user_question)
print("actual_output: ", actual_output)
print("rweb_results: ", rweb_results)
Expand All @@ -128,4 +123,4 @@ def test_case(

metric.measure(test_case)

return {"deepeval_score": metric.score, "deepevalscore_reason": metric.reason}
return {"deep_eval_score": metric.score, "deep_eval_score_reason": metric.reason}
22 changes: 13 additions & 9 deletions flows/reliefweb_chat/flow.dag.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,8 @@ nodes:
llm_question_result: ${answer_question.output}
rweb_query: ${create_rweb_query.output}
content_safety_result: ${content_safety.output.suggested_action}
deep_eval_score: ${deep_eval.output.deep_eval_score}
deep_eval_score_reason: ${deep_eval.output.deep_eval_score_reason}
- name: extract_references
type: python
source:
Expand Down Expand Up @@ -564,15 +566,6 @@ nodes:
inputs:
connection: azure_content_safety_connection
text: ${inputs.question}
- name: deep_eval
type: python
source:
type: code
path: deep_eval.py
inputs:
processed_output: ${process_output.output}
conn: azure_openai
deployment_name: gpt-35-turbo-16k
- name: concatenate_scores
type: python
source:
Expand Down Expand Up @@ -608,6 +601,17 @@ nodes:
inputs:
connection: azure_content_safety_connection
text: ${inputs.question}
- name: deep_eval
type: python
source:
type: code
path: deep_eval.py
inputs:
rweb_results: ${get_rweb_results.output}
user_question: ${inputs.question}
actual_output: ${answer_question.output}
conn: azure_openai
deployment_name: gpt-35-turbo-16k
node_variants:
summarize:
default_variant_id: variant_0
Expand Down
2 changes: 2 additions & 0 deletions flows/reliefweb_chat/process_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ def process_output(
refs: str,
llm_question_result: str,
content_safety_result: str,
deep_eval_score: float,
deep_eval_score_reason: str,
) -> dict:

# TODO Hack for bug where running full output generates different output compared to just running this node.
Expand Down

0 comments on commit e141d0c

Please sign in to comment.