Skip to content

Commit

Permalink
update: data structure in OpenAIJudge.frame_question
Browse files Browse the repository at this point in the history
  • Loading branch information
soumik12345 committed Oct 30, 2024
1 parent dc9579e commit 9ceb6ec
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 42 deletions.
7 changes: 5 additions & 2 deletions hemm/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from .prompt_alignment import (BLIPScoreMertric, CLIPImageQualityScoreMetric,
CLIPScoreMetric)
from .prompt_alignment import (
BLIPScoreMertric,
CLIPImageQualityScoreMetric,
CLIPScoreMetric,
)

__all__ = ["BLIPScoreMertric", "CLIPImageQualityScoreMetric", "CLIPScoreMetric"]
3 changes: 1 addition & 2 deletions hemm/metrics/image_quality/lpips.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
import torch
import weave
from PIL import Image
from torchmetrics.functional.image import \
learned_perceptual_image_patch_similarity
from torchmetrics.functional.image import learned_perceptual_image_patch_similarity

from ...utils import base64_encode_image
from .base import BaseImageQualityMetric, ComputeMetricOutput
Expand Down
71 changes: 36 additions & 35 deletions hemm/metrics/vqa/judges/mmllm_judges/openai_judge.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import subprocess
from typing import List
from typing import Dict, List

import spacy
import weave
Expand All @@ -9,8 +9,7 @@
from pydantic import BaseModel

from .....utils import base64_encode_image
from .commons import (JudgeMent, JudgeQuestion, PromptCategory,
TaggedPromptParts)
from .commons import JudgeMent, JudgeQuestion, PromptCategory, TaggedPromptParts


class OpenAIJudgeMent(BaseModel):
Expand Down Expand Up @@ -92,7 +91,7 @@ def extract_prompt_parts(self, prompt: str) -> List[TaggedPromptParts]:
return tagged_prompt_parts

@weave.op()
def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]:
def frame_question(self, prompt: str, image: Image.Image) -> List[Dict[str, str]]:
"""Frame the question corresponding to the given prompt and image for
the chain-of-thought system of judgement.
Expand All @@ -101,21 +100,21 @@ def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]
image (Image.Image): The image to frame the question for.
Returns:
List[JudgeQuestion]: List of questions to ask for the given prompt.
List[Dict[str, str]]: List of questions to ask for the given prompt.
"""
prompt = str(prompt)
if self.prompt_property in [PromptCategory.spatial, PromptCategory.spatial_3d]:
self._total_score = 5
question = JudgeQuestion(
image_desciption_system_prompt="""
question = {
"image_desciption_system_prompt": """
You are a helpful assistant meant to describe images is detail.
You should pay special attention to the objects and their spatial layout in the image.
""",
judgement_question_system_prompt="""
"judgement_question_system_prompt": """
You are a helpful assistant meant to identify objects and their spatial layout in the image.
You have to extract the question, the score, and the explanation from the user's response.
""",
judgement_question=f"""
"judgement_question": f"""
Looking at the image and given a detailed description of the image, evaluate if the text \"{prompt}\" is correctly portrayed in the image.
Give a score from 1 to 5, according to the following criteria:
Expand All @@ -135,20 +134,20 @@ def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]
3. The spatial layout of the objects in the image should be consistent with the text prompt. You should deduct 1 point from the score if the
spatial layout of the objects in the image is not consistent with the text prompt.
""",
)
}
return [(question, image)]
elif self.prompt_property == PromptCategory.action:
self._total_score = 5
question = JudgeQuestion(
image_desciption_system_prompt="""
question = {
"image_desciption_system_prompt": """
You are a helpful assistant meant to describe images is detail.
You should pay special attention to the the actions, events, objects and their relationships in the image.
""",
judgement_question_system_prompt="""
"judgement_question_system_prompt": """
You are a helpful assistant meant to identify the actions, events, objects and their relationships in the image.
You have to extract the question, the score, and the explanation from the user's response.
""",
judgement_question=f"""
"judgement_question": f"""
Looking at the image and given a detailed description of the image, evaluate if the text \"{prompt}\" is correctly portrayed in the image.
Give a score from 1 to 5, according to the following criteria:
Expand All @@ -168,20 +167,20 @@ def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]
3. The spatial layout of the objects in the image should be consistent with the text prompt. You should deduct 1 point from the score if the
spatial layout of the objects in the image is not consistent with the text prompt.
""",
)
}
return [(question, image)]
elif self.prompt_property == PromptCategory.numeracy:
self._total_score = 5
question = JudgeQuestion(
image_desciption_system_prompt="""
question = {
"image_desciption_system_prompt": """
You are a helpful assistant meant to describe images is detail.
You should pay special attention to the objects and their quantities in the image.
""",
judgement_question_system_prompt="""
"judgement_question_system_prompt": """
You are a helpful assistant meant to identify objects and their quantities in the image.
You have to extract the question, the score, and the explanation from the user's response.
""",
judgement_question=f"""
"judgement_question": f"""
Looking at the image and given a detailed description of the image, evaluate how well the image aligns with the text prompt: \"{prompt}\"
Give a score from 1 to 5, according to the following criteria:
Expand All @@ -201,23 +200,23 @@ def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]
3. The spatial layout of the objects in the image should be consistent with the text prompt. You should deduct 1 point from the score if the
spatial layout of the objects in the image is not consistent with the text prompt.
""",
)
}
return [(question, image)]
elif self.prompt_property == PromptCategory.complex:
self._total_score = 5
question = JudgeQuestion(
image_desciption_system_prompt="""
question = {
"image_desciption_system_prompt": """
You are a helpful assistant meant to describe images is detail.
You should pay special attention to the objects in the image and their attributes
(such as color, shape, texture), spatial layout and action relationships.
""",
judgement_question_system_prompt="""
"judgement_question_system_prompt": """
You are a helpful assistant meant to evaluate the correspondence of the image to a given text prompt.
Focus on the objects in the image and their attributes (such as color, shape, texture),
spatial layout and action relationships. You have to extract the question, the score, and the
explanation from the user's response.
""",
judgement_question=f"""
"judgement_question": f"""
Looking at the image and given a detailed description of the image, evaluate how well the image aligns with the text prompt: \"{prompt}\"
Give a score from 1 to 5, according to the following criteria:
Expand All @@ -237,21 +236,21 @@ def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]
3. The spatial layout of the objects in the image should be consistent with the text prompt. You should deduct 1 point from the score if the
spatial layout of the objects in the image is not consistent with the text prompt.
""",
)
}
return [(question, image)]
tagged_prompt_parts = self.extract_prompt_parts(prompt)
questions: List[str] = []
for tagged_prompt_part in tagged_prompt_parts:
question = JudgeQuestion(
image_desciption_system_prompt=f"""
question = {
"image_desciption_system_prompt": f"""
You are a helpful assistant meant to describe images is detail.
You should pay special attention to any objects and their {self.prompt_property.name} in the given image.
""",
judgement_question_system_prompt=f"""
"judgement_question_system_prompt": f"""
You are a helpful assistant meant to identify any objects and their {self.prompt_property.name}
in the given image. You have to extract the question, the score, and the explanation from the user's response.
""",
judgement_question=f"""
"judgement_question": f"""
Looking at the image and given a detailed description of the image, evaluate if there is a {tagged_prompt_part.entity} in the image.
Give a score from 1 to 4, according to the following criteria:
Expand All @@ -270,13 +269,13 @@ def frame_question(self, prompt: str, image: Image.Image) -> List[JudgeQuestion]
3. The spatial layout of the objects in the image should be consistent with the text prompt. You should deduct 1 point from the score if the
spatial layout of the objects in the image is not consistent with the text prompt.
""",
)
}
questions.append((question, image))
return questions

@weave.op
def execute_chain_of_thought(
self, question: JudgeQuestion, image: Image.Image
self, question: Dict[str, str], image: Image.Image
) -> OpenAIJudgeMent:
image_description_explanation = (
self._openai_client.chat.completions.create(
Expand All @@ -285,7 +284,7 @@ def execute_chain_of_thought(
messages=[
{
"role": "system",
"content": question.image_desciption_system_prompt,
"content": question["image_desciption_system_prompt"],
},
{
"role": "user",
Expand All @@ -301,7 +300,9 @@ def execute_chain_of_thought(
.choices[0]
.message.content
)
question.judgement_question += f"""
question[
"judgement_question"
] += f"""
Here is a detailed explanation of the image:
---
Expand All @@ -318,12 +319,12 @@ def execute_chain_of_thought(
messages=[
{
"role": "system",
"content": question.judgement_question_system_prompt,
"content": question["judgement_question_system_prompt"],
},
{
"role": "user",
"content": [
{"type": "text", "text": question.judgement_question},
{"type": "text", "text": question["judgement_question"]},
{
"type": "image_url",
"image_url": {"url": base64_encode_image(image)},
Expand Down
4 changes: 3 additions & 1 deletion hemm/tests/test_2d_spatial_relationship_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
from hemm.eval_pipelines import BaseDiffusionModel, EvaluationPipeline
from hemm.metrics.spatial_relationship import SpatialRelationshipMetric2D
from hemm.metrics.spatial_relationship.judges import (
DETRSpatialRelationShipJudge, RTDETRSpatialRelationShipJudge)
DETRSpatialRelationShipJudge,
RTDETRSpatialRelationShipJudge,
)


class Test2DSpatialRelationshipEval(unittest.TestCase):
Expand Down
3 changes: 1 addition & 2 deletions hemm/tests/test_prompt_alignment_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

import wandb
from hemm.eval_pipelines import BaseDiffusionModel, EvaluationPipeline
from hemm.metrics.prompt_alignment import (CLIPImageQualityScoreMetric,
CLIPScoreMetric)
from hemm.metrics.prompt_alignment import CLIPImageQualityScoreMetric, CLIPScoreMetric


class TestPromptAlignmentEvaluation(unittest.TestCase):
Expand Down

0 comments on commit 9ceb6ec

Please sign in to comment.