Skip to content

Commit

Permalink
update: examples
Browse files Browse the repository at this point in the history
  • Loading branch information
soumik12345 committed Oct 30, 2024
1 parent 0b6ee20 commit fcd7178
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 35 deletions.
23 changes: 6 additions & 17 deletions examples/multimodal_llm_eval/evaluate_mllm_metric_action.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import asyncio
from typing import Optional

import fire
import weave

import wandb
from hemm.eval_pipelines import EvaluationPipeline
from hemm.metrics.vqa import MultiModalLLMEvaluationMetric
from hemm.metrics.vqa.judges.mmllm_judges import OpenAIJudge, PromptCategory
from hemm.models import BaseDiffusionModel
from hemm.models import DiffusersModel


def main(
Expand All @@ -21,38 +20,28 @@ def main(
image_height: int = 1024,
image_width: int = 1024,
num_inference_steps: int = 50,
mock_inference_dataset_address: Optional[str] = None,
save_inference_dataset_name: Optional[str] = None,
):
wandb.init(project=project, entity=entity, job_type="evaluation")
weave.init(project_name=f"{entity}/{project}")

dataset = weave.ref(dataset_ref).get()
dataset = dataset.rows[:dataset_limit] if dataset_limit else dataset

diffusion_model = BaseDiffusionModel(
model = DiffusersModel(
diffusion_model_name_or_path=diffusion_model_address,
enable_cpu_offfload=diffusion_model_enable_cpu_offfload,
image_height=image_height,
image_width=image_width,
num_inference_steps=num_inference_steps,
)
diffusion_model._pipeline.set_progress_bar_config(disable=True)
evaluation_pipeline = EvaluationPipeline(
model=diffusion_model,
mock_inference_dataset_address=mock_inference_dataset_address,
save_inference_dataset_name=save_inference_dataset_name,
)
model._pipeline.set_progress_bar_config(disable=True)

judge = OpenAIJudge(
prompt_property=PromptCategory.action, openai_model=openai_judge_model
)
metric = MultiModalLLMEvaluationMetric(judge=judge)
evaluation_pipeline.add_metric(metric)

evaluation_pipeline(dataset=dataset)
wandb.finish()
evaluation_pipeline.cleanup()
evaluation = weave.Evaluation(dataset=dataset, scorers=[metric])
asyncio.run(evaluation.evaluate(model))


if __name__ == "__main__":
Expand Down
25 changes: 7 additions & 18 deletions examples/multimodal_llm_eval/evaluate_mllm_metric_complex.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import asyncio
from typing import Optional

import fire
import weave

import wandb
from hemm.eval_pipelines import EvaluationPipeline
from hemm.metrics.vqa import MultiModalLLMEvaluationMetric
from hemm.metrics.vqa.judges.mmllm_judges import OpenAIJudge, PromptCategory
from hemm.models import BaseDiffusionModel
from hemm.models import DiffusersModel


def main(
Expand All @@ -21,38 +20,28 @@ def main(
image_height: int = 1024,
image_width: int = 1024,
num_inference_steps: int = 50,
mock_inference_dataset_address: Optional[str] = None,
save_inference_dataset_name: Optional[str] = None,
):
wandb.init(project=project, entity=entity, job_type="evaluation")
weave.init(project_name=f"{entity}/{project}")

dataset = weave.ref(dataset_ref).get()
dataset = dataset.rows[:dataset_limit] if dataset_limit else dataset

diffusion_model = BaseDiffusionModel(
model = DiffusersModel(
diffusion_model_name_or_path=diffusion_model_address,
enable_cpu_offfload=diffusion_model_enable_cpu_offfload,
image_height=image_height,
image_width=image_width,
num_inference_steps=num_inference_steps,
)
diffusion_model._pipeline.set_progress_bar_config(disable=True)
evaluation_pipeline = EvaluationPipeline(
model=diffusion_model,
mock_inference_dataset_address=mock_inference_dataset_address,
save_inference_dataset_name=save_inference_dataset_name,
)
model._pipeline.set_progress_bar_config(disable=True)

judge = OpenAIJudge(
prompt_property=PromptCategory.action, openai_model=openai_judge_model
prompt_property=PromptCategory.complex, openai_model=openai_judge_model
)
metric = MultiModalLLMEvaluationMetric(judge=judge)
evaluation_pipeline.add_metric(metric)

evaluation_pipeline(dataset=dataset)
wandb.finish()
evaluation_pipeline.cleanup()
evaluation = weave.Evaluation(dataset=dataset, scorers=[metric])
asyncio.run(evaluation.evaluate(model))


if __name__ == "__main__":
Expand Down

0 comments on commit fcd7178

Please sign in to comment.