From 2992798e95e352f54d1185760514e9514b455bb6 Mon Sep 17 00:00:00 2001 From: junewgl <1965259211@qq.com> Date: Mon, 4 Dec 2023 20:57:49 +0800 Subject: [PATCH 1/5] feat: add API interface for model baseline result --- dbgpt_hub/baseline/__init__.py | 9 ++ dbgpt_hub/baseline/baseline.json | 74 ++++++++++++++++ dbgpt_hub/baseline/show_result.py | 118 ++++++++++++++++++++++++++ dbgpt_hub/baseline/show_result_api.py | 36 ++++++++ 4 files changed, 237 insertions(+) create mode 100644 dbgpt_hub/baseline/__init__.py create mode 100644 dbgpt_hub/baseline/baseline.json create mode 100644 dbgpt_hub/baseline/show_result.py create mode 100644 dbgpt_hub/baseline/show_result_api.py diff --git a/dbgpt_hub/baseline/__init__.py b/dbgpt_hub/baseline/__init__.py new file mode 100644 index 0000000..66b7ff1 --- /dev/null +++ b/dbgpt_hub/baseline/__init__.py @@ -0,0 +1,9 @@ +""" +dbgpt_hub.baseline +============== +""" + +from .show_result_api import show_all +from .show_result_api import show_model + +__all__ = ["show_all", "show_model"] diff --git a/dbgpt_hub/baseline/baseline.json b/dbgpt_hub/baseline/baseline.json new file mode 100644 index 0000000..9a30ae6 --- /dev/null +++ b/dbgpt_hub/baseline/baseline.json @@ -0,0 +1,74 @@ +{ + "spider": { + "llama2-7b-hf": { + "base": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + } + } + }, + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + }, + "lora": { + "alpaca":{ + "instruction": "test", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + }, + "qlora": { + + } + , + "ppo":{} + }, + "llama2-7b-chat-hf": { + } + } + } \ No newline at end of file diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py new file mode 100644 index 0000000..30837c3 --- /dev/null +++ b/dbgpt_hub/baseline/show_result.py @@ -0,0 +1,118 @@ +import os +import sys +import json +from typing import Optional, Dict, Any +from prettytable import PrettyTable + + +ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(ROOT_PATH) + +baseline_file = "./dbgpt_hub/baseline/baseline.json" +# read json +with open(baseline_file, 'r') as file: + baseline_json = json.load(file) + +def print_models_info(dataset, model, method, prompt): + print_table_models = PrettyTable() + models_header = ['dataset', 'model', 'method', 'prompt'] + models_info = [dataset, model, method, prompt] + print_table_models.field_names = models_header + print_table_models.add_rows([models_info]) + return print_table_models + + +def print_scores_info(acc_data): + print_table_scores = PrettyTable() + scores_header = ['etype', 'easy', 'medium', 'hard', 'extra', 'all'] + print_table_scores.field_names = scores_header + eytpe = "ex" + ex_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] + ex_score.insert(0, eytpe) + eytpe = "em" + em_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] + em_score.insert(0, eytpe) + print_table_scores.add_rows( + [ + ex_score, + em_score + ] + ) + return print_table_scores + +def show_model( + dataset, + model, + method, + prompt + ): + + # 1.get res + acc_data = baseline_json[dataset][model][method][prompt]['acc'] + + # 2.print models info + print_table_models = print_models_info(dataset, model, method, prompt) + print(print_table_models) + + # 3.print scores info + print_table_scores = print_scores_info(acc_data) + print(print_table_scores) + +def show_model_api(args: Optional[Dict[str, Any]] = None): + dataset = args["dataset"] + model = args["model"] + method = args["method"] + prompt = args["prompt"] + + show_model( + dataset, + model, + method, + prompt + ) + +def show_all(): + datasets = baseline_json.keys() + for dataset in datasets: + models = baseline_json[dataset].keys() + for model in models: + methods = baseline_json[dataset][model].keys() + for method in methods: + prompts = baseline_json[dataset][model][method].keys() + for prompt in prompts: + # 1.get scores info + acc_data = baseline_json[dataset][model][method][prompt]['acc'] + + # 2.print models info + print_table_models = print_models_info(dataset, model, method, prompt) + print(print_table_models) + + # 3.print scores info + print_table_scores = print_scores_info(acc_data) + print(print_table_scores) + + + + +def show_all_api(): + show_all() + + + +# def update(): +# # todo : 更新baseline.json +# # + + +if __name__ == "__main__": + # args + show_args = { + "dataset" : "spider", + "model" : "llama2-7b-hf", + "method" : "lora", + "prompt" : "alpaca", + } + show_model(show_args) + + show_all() + diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py new file mode 100644 index 0000000..ea62864 --- /dev/null +++ b/dbgpt_hub/baseline/show_result_api.py @@ -0,0 +1,36 @@ +from typing import Optional, Dict, Any + +from dbgpt_hub.baseline import show_result + + +def show_all(): + show_result.show_all_api() + +def show_model( + args: Optional[Dict[str, Any]] = None +): + # Arguments for show result + if args is None: + args = { + "dataset":"spider", + "model":"llama2-7b-hf", + "sft":"lora", + "prompt":"alpaca", + } + else: + args = args + + show_result.show_model_api(args) + + + +if __name__ == "__main__": + show_all() + + show_args = { + "dataset" : "spider", + "model" : "llama2-7b-hf", + "method" : "lora", + "prompt" : "alpaca" + } + show_model(show_args) From b9b087cbc7298fe0fe46adcdc3e52c8c5d076192 Mon Sep 17 00:00:00 2001 From: junewgl <1965259211@qq.com> Date: Tue, 5 Dec 2023 11:36:29 +0800 Subject: [PATCH 2/5] style: run black for model baseline result --- dbgpt_hub/baseline/show_result.py | 56 ++++++++++----------------- dbgpt_hub/baseline/show_result_api.py | 24 ++++++------ 2 files changed, 32 insertions(+), 48 deletions(-) diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py index 30837c3..d1e662e 100644 --- a/dbgpt_hub/baseline/show_result.py +++ b/dbgpt_hub/baseline/show_result.py @@ -10,12 +10,13 @@ baseline_file = "./dbgpt_hub/baseline/baseline.json" # read json -with open(baseline_file, 'r') as file: +with open(baseline_file, "r") as file: baseline_json = json.load(file) + def print_models_info(dataset, model, method, prompt): print_table_models = PrettyTable() - models_header = ['dataset', 'model', 'method', 'prompt'] + models_header = ["dataset", "model", "method", "prompt"] models_info = [dataset, model, method, prompt] print_table_models.field_names = models_header print_table_models.add_rows([models_info]) @@ -24,7 +25,7 @@ def print_models_info(dataset, model, method, prompt): def print_scores_info(acc_data): print_table_scores = PrettyTable() - scores_header = ['etype', 'easy', 'medium', 'hard', 'extra', 'all'] + scores_header = ["etype", "easy", "medium", "hard", "extra", "all"] print_table_scores.field_names = scores_header eytpe = "ex" ex_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] @@ -32,23 +33,13 @@ def print_scores_info(acc_data): eytpe = "em" em_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] em_score.insert(0, eytpe) - print_table_scores.add_rows( - [ - ex_score, - em_score - ] - ) + print_table_scores.add_rows([ex_score, em_score]) return print_table_scores -def show_model( - dataset, - model, - method, - prompt - ): +def show_model(dataset, model, method, prompt): # 1.get res - acc_data = baseline_json[dataset][model][method][prompt]['acc'] + acc_data = baseline_json[dataset][model][method][prompt]["acc"] # 2.print models info print_table_models = print_models_info(dataset, model, method, prompt) @@ -58,18 +49,15 @@ def show_model( print_table_scores = print_scores_info(acc_data) print(print_table_scores) + def show_model_api(args: Optional[Dict[str, Any]] = None): dataset = args["dataset"] model = args["model"] method = args["method"] prompt = args["prompt"] - show_model( - dataset, - model, - method, - prompt - ) + show_model(dataset, model, method, prompt) + def show_all(): datasets = baseline_json.keys() @@ -81,10 +69,12 @@ def show_all(): prompts = baseline_json[dataset][model][method].keys() for prompt in prompts: # 1.get scores info - acc_data = baseline_json[dataset][model][method][prompt]['acc'] + acc_data = baseline_json[dataset][model][method][prompt]["acc"] # 2.print models info - print_table_models = print_models_info(dataset, model, method, prompt) + print_table_models = print_models_info( + dataset, model, method, prompt + ) print(print_table_models) # 3.print scores info @@ -92,27 +82,23 @@ def show_all(): print(print_table_scores) - - def show_all_api(): - show_all() - + show_all() # def update(): # # todo : 更新baseline.json -# # +# # if __name__ == "__main__": # args show_args = { - "dataset" : "spider", - "model" : "llama2-7b-hf", - "method" : "lora", - "prompt" : "alpaca", + "dataset": "spider", + "model": "llama2-7b-hf", + "method": "lora", + "prompt": "alpaca", } show_model(show_args) - + show_all() - diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py index ea62864..18128da 100644 --- a/dbgpt_hub/baseline/show_result_api.py +++ b/dbgpt_hub/baseline/show_result_api.py @@ -6,16 +6,15 @@ def show_all(): show_result.show_all_api() -def show_model( - args: Optional[Dict[str, Any]] = None -): - # Arguments for show result + +def show_model(args: Optional[Dict[str, Any]] = None): + # Arguments for show result if args is None: args = { - "dataset":"spider", - "model":"llama2-7b-hf", - "sft":"lora", - "prompt":"alpaca", + "dataset": "spider", + "model": "llama2-7b-hf", + "sft": "lora", + "prompt": "alpaca", } else: args = args @@ -23,14 +22,13 @@ def show_model( show_result.show_model_api(args) - if __name__ == "__main__": show_all() show_args = { - "dataset" : "spider", - "model" : "llama2-7b-hf", - "method" : "lora", - "prompt" : "alpaca" + "dataset": "spider", + "model": "llama2-7b-hf", + "method": "lora", + "prompt": "alpaca", } show_model(show_args) From cb830964c9e2200395bf2a22b265e68039a26de4 Mon Sep 17 00:00:00 2001 From: junewgl <1965259211@qq.com> Date: Wed, 6 Dec 2023 10:56:04 +0800 Subject: [PATCH 3/5] perf: output colored table and update model baseline API --- dbgpt_hub/baseline/__init__.py | 5 +- dbgpt_hub/baseline/baseline.json | 227 +++++++++++++++++++------- dbgpt_hub/baseline/show_result.py | 176 ++++++++++++-------- dbgpt_hub/baseline/show_result_api.py | 34 ++-- 4 files changed, 280 insertions(+), 162 deletions(-) diff --git a/dbgpt_hub/baseline/__init__.py b/dbgpt_hub/baseline/__init__.py index 66b7ff1..569ec57 100644 --- a/dbgpt_hub/baseline/__init__.py +++ b/dbgpt_hub/baseline/__init__.py @@ -3,7 +3,6 @@ ============== """ -from .show_result_api import show_all -from .show_result_api import show_model +from .show_result_api import show_score, show_scores -__all__ = ["show_all", "show_model"] +__all__ = ["show_score", "show_scores"] diff --git a/dbgpt_hub/baseline/baseline.json b/dbgpt_hub/baseline/baseline.json index 9a30ae6..400d6f1 100644 --- a/dbgpt_hub/baseline/baseline.json +++ b/dbgpt_hub/baseline/baseline.json @@ -1,74 +1,175 @@ { - "spider": { - "llama2-7b-hf": { - "base": { - "alpaca":{ - "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", - "acc": { - "ex":{ - "easy": 0.1, - "medium": 0.1, - "hard": 0.1, - "extra": 0.1, - "all": 0.1 - }, - "em":{ - "easy": 0.1, - "medium": 0.1, - "hard": 0.1, - "extra": 0.1, - "all": 0.1 - } + "spider": { + "llama2-7b-hf": { + "base": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 } - }, - "openai":{ - "instruction": "openai-instruction", - "acc": { - "ex":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - }, - "em":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - } + } + }, + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + }, + "lora":{ + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + } + } + } + }, + "qlora": { + "openai":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + } + } + } + }, + "ppo":{} + }, + "llama2-7b-chat-hf": { + "ppo": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 } } }, - "lora": { - "alpaca":{ - "instruction": "test", - "acc": { - "ex":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - }, - "em":{ - "easy": 0.887, - "medium": 0.711, - "hard": 0.575, - "extra": 0.380, - "all": 0.677 - } + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + } + } + }, + "bird":{ + "llama2-7b-chat-hf": { + "ppo": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 } } }, - "qlora": { - + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } } - , - "ppo":{} - }, - "llama2-7b-chat-hf": { } } - } \ No newline at end of file + } +} \ No newline at end of file diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py index d1e662e..3a30107 100644 --- a/dbgpt_hub/baseline/show_result.py +++ b/dbgpt_hub/baseline/show_result.py @@ -2,65 +2,108 @@ import sys import json from typing import Optional, Dict, Any -from prettytable import PrettyTable - +from prettytable.colortable import ColorTable, Theme ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(ROOT_PATH) + +MYTHEME = Theme( + default_color="96", # blue + vertical_color="31", # red + horizontal_color="33", # yellow + junction_color="97", # white +) +HEADER = [ + "dataset", + "model", + "method", + "prompt", + "etype", + "easy", + "medium", + "hard", + "extra", + "all", +] baseline_file = "./dbgpt_hub/baseline/baseline.json" -# read json + with open(baseline_file, "r") as file: baseline_json = json.load(file) -def print_models_info(dataset, model, method, prompt): - print_table_models = PrettyTable() - models_header = ["dataset", "model", "method", "prompt"] - models_info = [dataset, model, method, prompt] - print_table_models.field_names = models_header - print_table_models.add_rows([models_info]) - return print_table_models - - -def print_scores_info(acc_data): - print_table_scores = PrettyTable() - scores_header = ["etype", "easy", "medium", "hard", "extra", "all"] - print_table_scores.field_names = scores_header - eytpe = "ex" - ex_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] - ex_score.insert(0, eytpe) - eytpe = "em" - em_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] - em_score.insert(0, eytpe) - print_table_scores.add_rows([ex_score, em_score]) - return print_table_scores - - -def show_model(dataset, model, method, prompt): - # 1.get res - acc_data = baseline_json[dataset][model][method][prompt]["acc"] - - # 2.print models info - print_table_models = print_models_info(dataset, model, method, prompt) - print(print_table_models) - - # 3.print scores info - print_table_scores = print_scores_info(acc_data) - print(print_table_scores) - - -def show_model_api(args: Optional[Dict[str, Any]] = None): - dataset = args["dataset"] - model = args["model"] - method = args["method"] - prompt = args["prompt"] - - show_model(dataset, model, method, prompt) - - -def show_all(): +def print_color_table_score(acc_data, dataset, model, method, prompt): + model_data = [dataset, model, method, prompt] + print_table_scores = ColorTable(theme=MYTHEME) + print_table_scores.field_names = HEADER + model_ex = get_model_score(acc_data, "ex", model_data) + model_em = get_model_score(acc_data, "em", model_data) + print_table_scores.add_rows([model_em, model_ex]) + print(print_table_scores, "\n") + + +def table_add_row(table_scores, acc_data, dataset, model, method, prompt): + model_data = [dataset, model, method, prompt] + model_ex = get_model_score(acc_data, "ex", model_data) + model_em = get_model_score(acc_data, "em", model_data) + table_scores.add_rows([model_em, model_ex]) + return table_scores + + +def add_scores_to_table( + table, json_data, dataset, model=None, method=None, prompt=None +): + if model is None: + for model_key in json_data.keys(): + add_scores_to_table( + table, json_data[model_key], dataset, model_key, method, prompt + ) + elif method is None: + for method_key in json_data.keys(): + add_scores_to_table( + table, json_data[method_key], dataset, model, method_key, prompt + ) + elif prompt is None: + for prompt_key in json_data.keys(): + add_scores_to_table( + table, json_data[prompt_key], dataset, model, method, prompt_key + ) + else: + acc_data = json_data["acc"] + table_add_row(table, acc_data, dataset, model, method, prompt) + + +def show_score(dataset=None, model=None, method=None, prompt=None): + if dataset is None: + raise ValueError("dataset cannot be None!") + elif model is None: + json_data = baseline_json[dataset] + elif method is None: + json_data = baseline_json[dataset][model] + elif prompt is None: + json_data = baseline_json[dataset][model][method] + else: + json_data = baseline_json[dataset][model][method][prompt] + table_scores = ColorTable(theme=MYTHEME) + table_scores.field_names = HEADER + add_scores_to_table(table_scores, json_data, dataset, model, method, prompt) + print(table_scores) + + +def show_score_api(dataset=None, model=None, method=None, prompt=None): + show_score(dataset, model, method, prompt) + + +def get_model_score(acc_data, etype, model_data): + etype_score = [etype] + [acc_data[etype][key] for key in acc_data[etype].keys()] + model_score = model_data + etype_score + return model_score + + +def show_scores(): datasets = baseline_json.keys() + table_scores = ColorTable(theme=MYTHEME) + table_scores.field_names = HEADER for dataset in datasets: models = baseline_json[dataset].keys() for model in models: @@ -68,37 +111,26 @@ def show_all(): for method in methods: prompts = baseline_json[dataset][model][method].keys() for prompt in prompts: - # 1.get scores info acc_data = baseline_json[dataset][model][method][prompt]["acc"] - - # 2.print models info - print_table_models = print_models_info( - dataset, model, method, prompt + table_scores = table_add_row( + table_scores, acc_data, dataset, model, method, prompt ) - print(print_table_models) - - # 3.print scores info - print_table_scores = print_scores_info(acc_data) - print(print_table_scores) + print(table_scores, "\n") -def show_all_api(): - show_all() +def show_scores_api(): + show_scores() # def update(): -# # todo : 更新baseline.json +# # todo : update baseline.json # # if __name__ == "__main__": - # args - show_args = { - "dataset": "spider", - "model": "llama2-7b-hf", - "method": "lora", - "prompt": "alpaca", - } - show_model(show_args) - - show_all() + # show_scores() + # show_score() # ValueError: dataset cannot be None! + # show_score(dataset="spider") + # show_score(dataset="spider", model="llama2-7b-hf") + # show_score(dataset="spider", model="llama2-7b-hf", method="base") + show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca") diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py index 18128da..a6fe80f 100644 --- a/dbgpt_hub/baseline/show_result_api.py +++ b/dbgpt_hub/baseline/show_result_api.py @@ -3,32 +3,18 @@ from dbgpt_hub.baseline import show_result -def show_all(): - show_result.show_all_api() +def show_scores(): + show_result.show_scores_api() -def show_model(args: Optional[Dict[str, Any]] = None): - # Arguments for show result - if args is None: - args = { - "dataset": "spider", - "model": "llama2-7b-hf", - "sft": "lora", - "prompt": "alpaca", - } - else: - args = args - - show_result.show_model_api(args) +def show_score(dataset=None, model=None, method=None, prompt=None): + show_result.show_score_api(dataset, model, method, prompt) if __name__ == "__main__": - show_all() - - show_args = { - "dataset": "spider", - "model": "llama2-7b-hf", - "method": "lora", - "prompt": "alpaca", - } - show_model(show_args) + # show_scores() + # show_score() # ValueError: dataset cannot be None! + # show_score(dataset="spider") + # show_score(dataset="spider", model="llama2-7b-hf") + # show_score(dataset="spider", model="llama2-7b-hf", method="base") + show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca") From 04d8785f991c2a73539708d6c60275b7e7163eb2 Mon Sep 17 00:00:00 2001 From: junewgl <1965259211@qq.com> Date: Wed, 6 Dec 2023 18:05:51 +0800 Subject: [PATCH 4/5] perf: update annotations --- dbgpt_hub/baseline/show_result.py | 43 ++++++++++++++++++++------- dbgpt_hub/baseline/show_result_api.py | 5 ---- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py index 3a30107..a79236d 100644 --- a/dbgpt_hub/baseline/show_result.py +++ b/dbgpt_hub/baseline/show_result.py @@ -74,6 +74,24 @@ def add_scores_to_table( def show_score(dataset=None, model=None, method=None, prompt=None): + """ + Displays the model baseline score information for a given dataset, model, method and prompt. + + Args: + dataset (str, optional): The dataset to be used for scoring. + model (str, optional): The model to be scored on the dataset. + method (str, optional): The training method to us. + prompt (str, optional): Additional information or context prompt. + + Returns: + model baseline score. + + + Examples + >>> from dbgpt_hub.baseline import show_score + >>> show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca") + + """ if dataset is None: raise ValueError("dataset cannot be None!") elif model is None: @@ -101,6 +119,21 @@ def get_model_score(acc_data, etype, model_data): def show_scores(): + """ + Displays baseline score information for all models. + + Args: + None + + Returns: + model baseline score. + + + Examples + >>> from dbgpt_hub.baseline import show_scores + >>> show_scores() + + """ datasets = baseline_json.keys() table_scores = ColorTable(theme=MYTHEME) table_scores.field_names = HEADER @@ -122,15 +155,5 @@ def show_scores_api(): show_scores() -# def update(): -# # todo : update baseline.json -# # - - if __name__ == "__main__": - # show_scores() - # show_score() # ValueError: dataset cannot be None! - # show_score(dataset="spider") - # show_score(dataset="spider", model="llama2-7b-hf") - # show_score(dataset="spider", model="llama2-7b-hf", method="base") show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca") diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py index a6fe80f..ec2db96 100644 --- a/dbgpt_hub/baseline/show_result_api.py +++ b/dbgpt_hub/baseline/show_result_api.py @@ -12,9 +12,4 @@ def show_score(dataset=None, model=None, method=None, prompt=None): if __name__ == "__main__": - # show_scores() - # show_score() # ValueError: dataset cannot be None! - # show_score(dataset="spider") - # show_score(dataset="spider", model="llama2-7b-hf") - # show_score(dataset="spider", model="llama2-7b-hf", method="base") show_score(dataset="spider", model="llama2-7b-hf", method="base", prompt="alpaca") From 3a89e565238ac177516ab91b17a714048ed0161f Mon Sep 17 00:00:00 2001 From: junewgl <1965259211@qq.com> Date: Wed, 6 Dec 2023 18:19:08 +0800 Subject: [PATCH 5/5] fix: merge conflict bug --- dbgpt_hub/baseline/show_result.py | 2 -- dbgpt_hub/baseline/show_result_api.py | 4 ---- 2 files changed, 6 deletions(-) diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py index f80853a..bcc2046 100644 --- a/dbgpt_hub/baseline/show_result.py +++ b/dbgpt_hub/baseline/show_result.py @@ -168,8 +168,6 @@ def show_scores(): prompts = baseline_json[dataset][model][method].keys() for prompt in prompts: acc_data = baseline_json[dataset][model][method][prompt]["acc"] - table_scores = table_add_row( - table_scores, acc_data, dataset, model, method, prompt table_scores = table_add_row( table_scores, acc_data, dataset, model, method, prompt ) diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py index 5b955a7..ec2db96 100644 --- a/dbgpt_hub/baseline/show_result_api.py +++ b/dbgpt_hub/baseline/show_result_api.py @@ -3,14 +3,10 @@ from dbgpt_hub.baseline import show_result -def show_scores(): - show_result.show_scores_api() def show_scores(): show_result.show_scores_api() -def show_score(dataset=None, model=None, method=None, prompt=None): - show_result.show_score_api(dataset, model, method, prompt) def show_score(dataset=None, model=None, method=None, prompt=None): show_result.show_score_api(dataset, model, method, prompt)