Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additional step to pipeline to generate a metrics report #241

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions eval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .final import run_final_eval_op
from .final import generate_metrics_report_op, run_final_eval_op
from .mt_bench import run_mt_bench_op

__all__ = ["run_final_eval_op", "run_mt_bench_op"]
__all__ = ["run_final_eval_op", "run_mt_bench_op", "generate_metrics_report_op"]
61 changes: 52 additions & 9 deletions eval/final.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
# type: ignore
# pylint: disable=import-outside-toplevel,import-error

from kfp.dsl import Artifact, Output, component
from kfp.dsl import Metrics, Output, component

from utils.consts import RHELAI_IMAGE
from utils.consts import PYTHON_IMAGE, RHELAI_IMAGE


@component(base_image=RHELAI_IMAGE, install_kfp_package=False)
def run_final_eval_op(
mmlu_branch_output: Output[Artifact],
mt_bench_branch_output: Output[Artifact],
base_model_dir: str,
base_branch: str,
candidate_branch: str,
Expand All @@ -20,10 +18,13 @@ def run_final_eval_op(
candidate_model: str = None,
taxonomy_path: str = "/input/taxonomy",
sdg_path: str = "/input/sdg",
mmlu_branch_output_path: str = "/output/mmlu_branch",
mt_bench_branch_output_path: str = "/output/mt_bench_branch",
):
import json
import os
import subprocess
from pathlib import Path

import httpx
import torch
Expand Down Expand Up @@ -320,13 +321,18 @@ def find_node_dataset_directories(base_dir: str):
"report_title": "KNOWLEDGE EVALUATION REPORT",
"max_score": "1.0",
"model": candidate_model,
"model_score": round(overall_score, 2),
"trained_model_score": round(overall_score, 2),
"base_model": base_model_dir,
"base_model_score": round(base_overall_score, 2),
"summary": summary,
}

with open(mmlu_branch_output.path, "w", encoding="utf-8") as f:
if not os.path.exists(mmlu_branch_output_path):
os.makedirs(mmlu_branch_output_path)
mmlu_branch_output_file = (
Path(mmlu_branch_output_path) / "mmlu_branch_data.json"
)
with open(mmlu_branch_output_file, "w", encoding="utf-8") as f:
json.dump(mmlu_branch_data, f, indent=4)
else:
print("No MMLU tasks directories found, skipping MMLU_branch evaluation.")
Expand Down Expand Up @@ -464,11 +470,48 @@ def find_node_dataset_directories(base_dir: str):
"model": candidate_model,
"judge_model": judge_model_name,
"max_score": "10.0",
"overall_score": overall_score,
"base_overall_score": base_overall_score,
"trained_model_score": overall_score,
"base_model_score": base_overall_score,
"error_rate": error_rate,
"summary": summary,
}

with open(mt_bench_branch_output.path, "w", encoding="utf-8") as f:
if not os.path.exists(mt_bench_branch_output_path):
os.makedirs(mt_bench_branch_output_path)
mt_bench_branch_data_file = (
Path(mt_bench_branch_output_path) / "mt_bench_branch_data.json"
)
with open(
mt_bench_branch_data_file,
"w",
encoding="utf-8",
) as f:
json.dump(mt_bench_branch_data, f, indent=4)


@component(base_image=PYTHON_IMAGE, install_kfp_package=False)
def generate_metrics_report_op(
metrics: Output[Metrics],
):
import json

reports = {
"mt_bench": "/output/mt_bench_data.json",
"mt_bench_branch": "/output/mt_bench_branch/mt_bench_branch_data.json",
"mmlu_branch": "/output/mmlu_branch/mmlu_branch_data.json",
}

for report, file_name in reports.items():
with open(file_name, "r", encoding="utf-8") as f:
report_data = json.load(f)

if report == "mt_bench":
metrics.log_metric(f"{report}_best_model", report_data["best_model"])
metrics.log_metric(f"{report}_best_score", report_data["best_score"])
else:
metrics.log_metric(
f"{report}_trained_model_score", report_data["trained_model_score"]
)
metrics.log_metric(
f"{report}_base_model_score", report_data["base_model_score"]
)
15 changes: 8 additions & 7 deletions eval/mt_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def run_mt_bench_op(
max_workers: str,
models_folder: str,
output_path: str = "/output/mt_bench_data.json",
best_score_file: Optional[str] = None,
) -> NamedTuple("outputs", best_model=str, best_score=float):
import json
import os
Expand Down Expand Up @@ -188,15 +187,17 @@ def shutdown_vllm(process: subprocess.Popen, timeout: int = 20):
all_mt_bench_data.append(mt_bench_data)
scores[model_path] = overall_score

with open(output_path, "w", encoding="utf-8") as f:
json.dump(all_mt_bench_data, f, indent=4)

outputs = NamedTuple("outputs", best_model=str, best_score=float)
best_model = max(scores, key=scores.get)
best_score = scores[best_model]
if best_score_file:
with open(best_score_file, "w", encoding="utf-8") as f:
json.dump({"best_model": best_model, "best_score": best_score}, f, indent=4)
mt_bench_report = {
"best_model": best_model,
"best_score": best_score,
"reports": all_mt_bench_data,
}
MichaelClifford marked this conversation as resolved.
Show resolved Hide resolved

with open(output_path, "w", encoding="utf-8") as f:
json.dump(mt_bench_report, f, indent=4)

# Rename the best model directory to "candidate_model" for the next step
# So we know which model to use for the final evaluation
Expand Down
47 changes: 43 additions & 4 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
use_secret_as_volume,
)

from eval import run_final_eval_op, run_mt_bench_op
from eval import generate_metrics_report_op, run_final_eval_op, run_mt_bench_op
from sdg import (
git_clone_op,
sdg_op,
Expand All @@ -33,7 +33,9 @@
from utils import (
ilab_importer_op,
model_to_pvc_op,
pvc_to_mmlu_branch_op,
pvc_to_model_op,
pvc_to_mt_bench_branch_op,
pvc_to_mt_bench_op,
)
from utils.consts import RHELAI_IMAGE
Expand Down Expand Up @@ -424,9 +426,28 @@ def ilab_pipeline(
mount_path="/output",
)

output_pvc_delete_task = DeletePVC(pvc_name=output_pvc_task.output)
output_pvc_delete_task.after(
output_model_task, output_mt_bench_task, final_eval_task
output_mt_bench_branch_task = pvc_to_mt_bench_branch_op(
pvc_path="/output/mt_bench_branch/mt_bench_branch_data.json",
)
output_mt_bench_branch_task.after(final_eval_task)
output_mt_bench_branch_task.set_caching_options(False)

mount_pvc(
task=output_mt_bench_branch_task,
pvc_name=output_pvc_task.output,
mount_path="/output",
)

output_mmlu_branch_task = pvc_to_mmlu_branch_op(
pvc_path="/output/mmlu_branch/mmlu_branch_data.json",
)
output_mmlu_branch_task.after(final_eval_task)
output_mmlu_branch_task.set_caching_options(False)

mount_pvc(
task=output_mmlu_branch_task,
pvc_name=output_pvc_task.output,
mount_path="/output",
)

sdg_pvc_delete_task = DeletePVC(pvc_name=sdg_input_pvc_task.output)
Expand All @@ -435,6 +456,24 @@ def ilab_pipeline(
model_pvc_delete_task = DeletePVC(pvc_name=model_pvc_task.output)
model_pvc_delete_task.after(final_eval_task)

generate_metrics_report_task = generate_metrics_report_op()
generate_metrics_report_task.after(final_eval_task)
generate_metrics_report_task.set_caching_options(False)
mount_pvc(
task=generate_metrics_report_task,
pvc_name=output_pvc_task.output,
mount_path="/output",
)

output_pvc_delete_task = DeletePVC(pvc_name=output_pvc_task.output)
output_pvc_delete_task.after(
output_model_task,
output_mt_bench_task,
output_mmlu_branch_task,
output_mt_bench_branch_task,
generate_metrics_report_task,
Comment on lines +472 to +474
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry to be so stubborn here. 😄 I still think this is not a correct list of dependencies. I think it should be:

Suggested change
output_mmlu_branch_task,
output_mt_bench_branch_task,
generate_metrics_report_task,
output_model_task,
output_mt_bench_task,
output_mmlu_branch_task,
output_mt_bench_branch_task,
generate_metrics_report_task,

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I appreciate your stubbornness on this PR 😃 Change made.

)

return


Expand Down
Loading