Skip to content

Commit

Permalink
update yaml and utils
Browse files Browse the repository at this point in the history
JvThunder committed Mar 15, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 1fdfb3c commit a92cc3a
Showing 4 changed files with 35 additions and 39 deletions.
6 changes: 4 additions & 2 deletions lmms_eval/tasks/olympiadbench/olympiadbench.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
group: flickr30k
group: olympiadbench
task:
- flickr30k_test
- olympiadbench_test
metadata:
- version: 0.0
4 changes: 2 additions & 2 deletions lmms_eval/tasks/olympiadbench/olympiadbench_evals.py
Original file line number Diff line number Diff line change
@@ -5,13 +5,13 @@
import math

# how to use
# scorer = AutoScoringJudge()
# scorer = OlympiadBenchEvaluator()
# exp1 = "10^{10^{10^{10}}}"
# exp2 = "10^{10}"
# precision = 1e-4
# res = scorer.judge(exp1, exp2, precision)

class AutoScoringJudge:
class OlympiadBenchEvaluator:
def __init__(self):
# Map of special symbols to their replacements
self.special_signal_map = {
23 changes: 15 additions & 8 deletions lmms_eval/tasks/olympiadbench/olympiadbench_test.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,32 @@
dataset_path: lmms-lab/OlympiadBench
dataset_kwargs:
token: True
task : "olympiad_bench"
task : "olympiadbench_test"
test_split: test
output_type: generate_until
doc_to_visual: !function utils.olympiadbench_doc_to_visual
doc_to_text: !function utils.olympiadbench_doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 64
until:
- "ASSISTANT:"
max_new_tokens: 1024
temperature: 0
top_p: 0
num_beams: 1
do_sample: false
process_results: !function utils.olympiadbench_process_result
process_results: !function utils.olympiadbench_process_results
metric_list:
- metric: human_eval
aggregation: !function utils.human_eval
higher_is_better: True
- metric: submission
aggregation: !function utils.mathvista_aggregate_results
higher_is_better: true
- metric: auto_scoring
aggregation: !function utils.auto_scoring
higher_is_better: True
metadata:
- version: 0.0

model_specific_prompt_kwargs:
default:
shot_type: "format-prompt" # can be "reason-first", "solution", "step-by-step"
model_specific_generation_kwargs:
llava:
image_aspect_ratio: original
41 changes: 14 additions & 27 deletions lmms_eval/tasks/olympiadbench/utils.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,30 @@
import os
import json
from pycocoevalcap.eval import COCOEvalCap, Bleu, Meteor, Rouge, Cider, Spice
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
from pycocotools.coco import COCO
from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
import datetime
from lmms_eval.tasks.olympiadbench.olympiadbench_evals import OlympiadBenchEvaluator
from lmms_eval.tasks._task_utils.file_utils import generate_submission_file

import logging

eval_logger = logging.getLogger("lmms-eval")

dir_name = os.path.dirname(os.path.abspath(__file__))


olympiadbench_evaluator = OlympiadBenchEvaluator()

def olympiadbench_doc_to_visual(doc):
return [doc["image"].convert("RGB")]

return [image.convert("RGB") for image in doc["images"]]

def olympiadbench_doc_to_text(doc):
# question = "Please carefully observe the image and come up with a caption for the image"
return f"Provide a one-sentence caption for the provided image."


def olympiadbench_process_result(doc, result):
"""
Args:
doc: a instance of the eval dataset
results: [pred]
Returns:
a dictionary with key: metric name, value: metric value
"""
pred = result[0] if len(result) > 0 else ""
image_id = int(doc["img_id"])

data_dict = {"answer": doc["caption"], "pred": pred, "image_id": image_id}
problem = {
"question_type": doc["question_type"],
"answer_type": doc["answer_type"]
}
pass

return {f"flickr_{metric}": data_dict for metric in FLICKR_METRICS}
def olympiadbench_process_results(doc, result):
pass

def olympiadbench_aggregation_results(results, metric, args):
pass

def olympiadbench_aggregation_result(results, metric, args):
def auto_scoring(results, metric, args):
pass

0 comments on commit a92cc3a

Please sign in to comment.