Skip to content

Commit

Permalink
update: vqa metrics docs
Browse files Browse the repository at this point in the history
  • Loading branch information
soumik12345 committed Oct 30, 2024
1 parent 1b2f817 commit 3e59880
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 22 deletions.
14 changes: 9 additions & 5 deletions docs/metrics/vqa/disentangled_vqa.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,25 +33,29 @@ This module aims to implement the Disentangled VQA metric inspired by Section 4.
## Step 2: Evaluate

```python
import wandb
import asyncio

import weave

wandb.init(project=project, entity=entity, job_type="evaluation")
from hemm.metrics.vqa import DisentangledVQAMetric
from hemm.metrics.vqa.judges import BlipVQAJudge
from hemm.models import DiffusersModel

weave.init(project_name=project)

diffusion_model = BaseDiffusionModel(
diffusion_model = DiffusersModel(
diffusion_model_name_or_path=diffusion_model_address,
enable_cpu_offfload=diffusion_model_enable_cpu_offfload,
image_height=image_size[0],
image_width=image_size[1],
)
evaluation_pipeline = EvaluationPipeline(model=diffusion_model)

judge = BlipVQAJudge()
metric = DisentangledVQAMetric(judge=judge, name="disentangled_blip_metric")
evaluation_pipeline.add_metric(metric)

evaluation_pipeline(dataset=dataset)
evaluation = weave.Evaluation(dataset=dataset, scorers=[metric])
asyncio.run(evaluation.evaluate(model))
```

## Metrics
Expand Down
28 changes: 11 additions & 17 deletions docs/metrics/vqa/multi_modal_llm.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,25 @@ This module aims to implement the Multi-modal LLM based metric inspired by
```
Finallly, you can run the following snippet to evaluate your model:
```python
import wandb
import asyncio

import weave

from hemm.eval_pipelines import BaseDiffusionModel, EvaluationPipeline
from hemm.metrics.vqa import MultiModalLLMEvaluationMetric
from hemm.metrics.vqa.judges.mmllm_judges import OpenAIJudge, PromptCategory

wandb.init(project="mllm-eval", job_type="evaluation")
weave.init(project_name="mllm-eval")
from hemm.metrics.vqa.judges.mmllm_judges import OpenAIJudge
from hemm.models import DiffusersModel

dataset = weave.ref(dataset_ref).get()
weave.init(project_name="hemm-eval/mllm-eval")

diffusion_model = BaseDiffusionModel(
model = DiffusersModel(
diffusion_model_name_or_path="stabilityai/stable-diffusion-2-1",
enable_cpu_offfload=False,
image_height=512,
image_width=512,
image_height=1024,
image_width=1024,
)
evaluation_pipeline = EvaluationPipeline(model=diffusion_model)

judge = OpenAIJudge(prompt_property=PromptCategory.complex)
metric = MultiModalLLMEvaluationMetric(judge=judge)
evaluation_pipeline.add_metric(metric)
metric = MultiModalLLMEvaluationMetric(judge=OpenAIJudge())

evaluation_pipeline(dataset=dataset)
evaluation = weave.Evaluation(dataset=weave.ref("Dataset:v2").get(), scorers=[metric])
asyncio.run(evaluation.evaluate(model))
```

## Metrics
Expand Down

0 comments on commit 3e59880

Please sign in to comment.