From e1fbd2cf83f7684bddb08a772524b4222f6ee87c Mon Sep 17 00:00:00 2001 From: junewgl <45283002+junewgl@users.noreply.github.com> Date: Wed, 6 Dec 2023 17:56:27 +0800 Subject: [PATCH] perf: output colored table and update model baseline API (#166) --- dbgpt_hub/baseline/__init__.py | 5 +- dbgpt_hub/baseline/baseline.json | 227 +++++++++++++++++++------- dbgpt_hub/baseline/show_result.py | 176 ++++++++++++-------- dbgpt_hub/baseline/show_result_api.py | 34 ++-- 4 files changed, 280 insertions(+), 162 deletions(-) diff --git a/dbgpt_hub/baseline/__init__.py b/dbgpt_hub/baseline/__init__.py index 66b7ff1..569ec57 100644 --- a/dbgpt_hub/baseline/__init__.py +++ b/dbgpt_hub/baseline/__init__.py @@ -3,7 +3,6 @@ ============== """ -from .show_result_api import show_all -from .show_result_api import show_model +from .show_result_api import show_score, show_scores -__all__ = ["show_all", "show_model"] +__all__ = ["show_score", "show_scores"] diff --git a/dbgpt_hub/baseline/baseline.json b/dbgpt_hub/baseline/baseline.json index 9a30ae6..400d6f1 100644 --- a/dbgpt_hub/baseline/baseline.json +++ b/dbgpt_hub/baseline/baseline.json @@ -1,74 +1,175 @@ { - "spider": { - "llama2-7b-hf": { - "base": { - "alpaca":{ - "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", - "acc": { - "ex":{ - "easy": 0.1, - "medium": 0.1, - "hard": 0.1, - "extra": 0.1, - "all": 0.1 - }, - "em":{ - "easy": 0.1, - "medium": 0.1, - "hard": 0.1, - "extra": 0.1, - "all": 0.1 - } + "spider": { + "llama2-7b-hf": { + "base": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 } - }, - "openai":{ - "instruction": "openai-instruction", - "acc": { - "ex":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - }, - "em":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - } + } + }, + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + }, + "lora":{ + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + } + } + } + }, + "qlora": { + "openai":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + } + } + } + }, + "ppo":{} + }, + "llama2-7b-chat-hf": { + "ppo": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 } } }, - "lora": { - "alpaca":{ - "instruction": "test", - "acc": { - "ex":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - }, - "em":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - } + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + } + } + }, + "bird":{ + "llama2-7b-chat-hf": { + "ppo": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 } } }, - "qlora": { - + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } } - , - "ppo":{} - }, - "llama2-7b-chat-hf": { } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py index d1e662e..3a30107 100644 --- a/dbgpt_hub/baseline/show_result.py +++ b/dbgpt_hub/baseline/show_result.py @@ -2,65 +2,108 @@ import sys import json from typing import Optional, Dict, Any -from prettytable import PrettyTable - +from prettytable.colortable import ColorTable, Theme ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(ROOT_PATH) + +MYTHEME = Theme( + default_color="96", # blue + vertical_color="31", # red + horizontal_color="33", # yellow + junction_color="97", # white +) +HEADER = [ + "dataset", + "model", + "method", + "prompt", + "etype", + "easy", + "medium", + "hard", + "extra", + "all", +] baseline_file = "./dbgpt_hub/baseline/baseline.json" -# read json + with open(baseline_file, "r") as file: baseline_json = json.load(file) -def print_models_info(dataset, model, method, prompt): - print_table_models = PrettyTable() - models_header = ["dataset", "model", "method", "prompt"] - models_info = [dataset, model, method, prompt] - print_table_models.field_names = models_header - print_table_models.add_rows([models_info]) - return print_table_models - - -def print_scores_info(acc_data): - print_table_scores = PrettyTable() - scores_header = ["etype", "easy", "medium", "hard", "extra", "all"] - print_table_scores.field_names = scores_header - eytpe = "ex" - ex_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] - ex_score.insert(0, eytpe) - eytpe = "em" - em_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] - em_score.insert(0, eytpe) - print_table_scores.add_rows([ex_score, em_score]) - return print_table_scores - - -def show_model(dataset, model, method, prompt): - # 1.get res - acc_data = baseline_json[dataset][model][method][prompt]["acc"] - - # 2.print models info - print_table_models = print_models_info(dataset, model, method, prompt) - print(print_table_models) - - # 3.print scores info - print_table_scores = print_scores_info(acc_data) - print(print_table_scores) - - -def show_model_api(args: Optional[Dict[str, Any]] = None): - dataset = args["dataset"] - model = args["model"] - method = args["method"] - prompt = args["prompt"] - - show_model(dataset, model, method, prompt) - - -def show_all(): +def print_color_table_score(acc_data, dataset, model, method, prompt): + model_data = [dataset, model, method, prompt] + print_table_scores = ColorTable(theme=MYTHEME) + print_table_scores.field_names = HEADER + model_ex = get_model_score(acc_data, "ex", model_data) + model_em = get_model_score(acc_data, "em", model_data) + print_table_scores.add_rows([model_em, model_ex]) + print(print_table_scores, "\n") + + +def table_add_row(table_scores, acc_data, dataset, model, method, prompt): + model_data = [dataset, model, method, prompt] + model_ex = get_model_score(acc_data, "ex", model_data) + model_em = get_model_score(acc_data, "em", model_data) + table_scores.add_rows([model_em, model_ex]) + return table_scores + + +def add_scores_to_table( + table, json_data, dataset, model=None, method=None, prompt=None +): + if model is None: + for model_key in json_data.keys(): + add_scores_to_table( + table, json_data[model_key], dataset, model_key, method, prompt + ) + elif method is None: + for method_key in json_data.keys(): + add_scores_to_table( + table, json_data[method_key], dataset, model, method_key, prompt + ) + elif prompt is None: + for prompt_key in json_data.keys(): + add_scores_to_table( + table, json_data[prompt_key], dataset, model, method, prompt_key + ) + else: + acc_data = json_data["acc"] + table_add_row(table, acc_data, dataset, model, method, prompt) + + +def show_score(dataset=None, model=None, method=None, prompt=None): + if dataset is None: + raise ValueError("dataset cannot be None!") + elif model is None: + json_data = baseline_json[dataset] + elif method is None: + json_data = baseline_json[dataset][model] + elif prompt is None: + json_data = baseline_json[dataset][model][method] + else: + json_data = baseline_json[dataset][model][method][prompt] + table_scores = ColorTable(theme=MYTHEME) + table_scores.field_names = HEADER + add_scores_to_table(table_scores, json_data, dataset, model, method, prompt) + print(table_scores) + + +def show_score_api(dataset=None, model=None, method=None, prompt=None): + show_score(dataset, model, method, prompt) + + +def get_model_score(acc_data, etype, model_data): + etype_score = [etype] + [acc_data[etype][key] for key in acc_data[etype].keys()] + model_score = model_data + etype_score + return model_score + + +def show_scores(): datasets = baseline_json.keys() + table_scores = ColorTable(theme=MYTHEME) + table_scores.field_names = HEADER for dataset in datasets: models = baseline_json[dataset].keys() for model in models: @@ -68,37 +111,26 @@ def show_all(): for method in methods: prompts = baseline_json[dataset][model][method].keys() for prompt in prompts: - # 1.get scores info acc_data = baseline_json[dataset][model][method][prompt]["acc"] - - # 2.print models info - print_table_models = print_models_info( - dataset, model, method, prompt + table_scores = table_add_row( + table_scores, acc_data, dataset, model, method, prompt ) - print(print_table_models) - - # 3.print scores info - print_table_scores = print_scores_info(acc_data) - print(print_table_scores) + print(table_scores, "\n") -def show_all_api(): - show_all() +def show_scores_api(): + show_scores() # def update(): -# # todo : 更新baseline.json +# # todo : update baseline.json # # if __name__ == "__main__": - # args - show_args = { - "dataset": "spider", - "model": "llama2-7b-hf", - "method": "lora", - "prompt": "alpaca", - } - show_model(show_args) - - show_all() + # show_scores() + # show_score() # ValueError: dataset cannot be None! + # show_score(dataset="spider") + # show_score(dataset="spider", model="llama2-7b-hf") + # show_score(dataset="spider", model="llama2-7b-hf", method="base") + show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca") diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py index 18128da..a6fe80f 100644 --- a/dbgpt_hub/baseline/show_result_api.py +++ b/dbgpt_hub/baseline/show_result_api.py @@ -3,32 +3,18 @@ from dbgpt_hub.baseline import show_result -def show_all(): - show_result.show_all_api() +def show_scores(): + show_result.show_scores_api() -def show_model(args: Optional[Dict[str, Any]] = None): - # Arguments for show result - if args is None: - args = { - "dataset": "spider", - "model": "llama2-7b-hf", - "sft": "lora", - "prompt": "alpaca", - } - else: - args = args - - show_result.show_model_api(args) +def show_score(dataset=None, model=None, method=None, prompt=None): + show_result.show_score_api(dataset, model, method, prompt) if __name__ == "__main__": - show_all() - - show_args = { - "dataset": "spider", - "model": "llama2-7b-hf", - "method": "lora", - "prompt": "alpaca", - } - show_model(show_args) + # show_scores() + # show_score() # ValueError: dataset cannot be None! + # show_score(dataset="spider") + # show_score(dataset="spider", model="llama2-7b-hf") + # show_score(dataset="spider", model="llama2-7b-hf", method="base") + show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca")