From f91a02b6ded4591601a0ddfa30d08e32304f6bfa Mon Sep 17 00:00:00 2001 From: junewgl <45283002+junewgl@users.noreply.github.com> Date: Tue, 5 Dec 2023 12:14:22 +0800 Subject: [PATCH 1/3] feat: add API interface for model baseline result (#162) --- dbgpt_hub/baseline/__init__.py | 9 +++ dbgpt_hub/baseline/baseline.json | 74 ++++++++++++++++++ dbgpt_hub/baseline/show_result.py | 104 ++++++++++++++++++++++++++ dbgpt_hub/baseline/show_result_api.py | 34 +++++++++ 4 files changed, 221 insertions(+) create mode 100644 dbgpt_hub/baseline/__init__.py create mode 100644 dbgpt_hub/baseline/baseline.json create mode 100644 dbgpt_hub/baseline/show_result.py create mode 100644 dbgpt_hub/baseline/show_result_api.py diff --git a/dbgpt_hub/baseline/__init__.py b/dbgpt_hub/baseline/__init__.py new file mode 100644 index 0000000..66b7ff1 --- /dev/null +++ b/dbgpt_hub/baseline/__init__.py @@ -0,0 +1,9 @@ +""" +dbgpt_hub.baseline +============== +""" + +from .show_result_api import show_all +from .show_result_api import show_model + +__all__ = ["show_all", "show_model"] diff --git a/dbgpt_hub/baseline/baseline.json b/dbgpt_hub/baseline/baseline.json new file mode 100644 index 0000000..9a30ae6 --- /dev/null +++ b/dbgpt_hub/baseline/baseline.json @@ -0,0 +1,74 @@ +{ + "spider": { + "llama2-7b-hf": { + "base": { + "alpaca":{ + "instruction": "I want you to act as a SQL terminal in front of an example database, you need only to return the sql command to me.Below is an instruction that describes a task, Write a response that appropriately completes the request.\\n\\\"\\n##Instruction:\\ndepartment_management contains tables such as department, head, management. Table department has columns such as Department_ID, Name, Creation, Ranking, Budget_in_Billions, Num_Employees. Department_ID is the primary key.\\nTable head has columns such as head_ID, name, born_state, age. head_ID is the primary key.\\nTable management has columns such as department_ID, head_ID, temporary_acting. department_ID is the primary key.\\nThe head_ID of management is the foreign key of head_ID of head.\\nThe department_ID of management is the foreign key of Department_ID of department.\\n\\n", + "acc": { + "ex":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + }, + "em":{ + "easy": 0.1, + "medium": 0.1, + "hard": 0.1, + "extra": 0.1, + "all": 0.1 + } + } + }, + "openai":{ + "instruction": "openai-instruction", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + }, + "lora": { + "alpaca":{ + "instruction": "test", + "acc": { + "ex":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + }, + "em":{ + "easy": 0.887, + "medium": 0.711, + "hard": 0.575, + "extra": 0.380, + "all": 0.677 + } + } + } + }, + "qlora": { + + } + , + "ppo":{} + }, + "llama2-7b-chat-hf": { + } + } + } \ No newline at end of file diff --git a/dbgpt_hub/baseline/show_result.py b/dbgpt_hub/baseline/show_result.py new file mode 100644 index 0000000..d1e662e --- /dev/null +++ b/dbgpt_hub/baseline/show_result.py @@ -0,0 +1,104 @@ +import os +import sys +import json +from typing import Optional, Dict, Any +from prettytable import PrettyTable + + +ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(ROOT_PATH) + +baseline_file = "./dbgpt_hub/baseline/baseline.json" +# read json +with open(baseline_file, "r") as file: + baseline_json = json.load(file) + + +def print_models_info(dataset, model, method, prompt): + print_table_models = PrettyTable() + models_header = ["dataset", "model", "method", "prompt"] + models_info = [dataset, model, method, prompt] + print_table_models.field_names = models_header + print_table_models.add_rows([models_info]) + return print_table_models + + +def print_scores_info(acc_data): + print_table_scores = PrettyTable() + scores_header = ["etype", "easy", "medium", "hard", "extra", "all"] + print_table_scores.field_names = scores_header + eytpe = "ex" + ex_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] + ex_score.insert(0, eytpe) + eytpe = "em" + em_score = [acc_data[eytpe][key] for key in acc_data[eytpe].keys()] + em_score.insert(0, eytpe) + print_table_scores.add_rows([ex_score, em_score]) + return print_table_scores + + +def show_model(dataset, model, method, prompt): + # 1.get res + acc_data = baseline_json[dataset][model][method][prompt]["acc"] + + # 2.print models info + print_table_models = print_models_info(dataset, model, method, prompt) + print(print_table_models) + + # 3.print scores info + print_table_scores = print_scores_info(acc_data) + print(print_table_scores) + + +def show_model_api(args: Optional[Dict[str, Any]] = None): + dataset = args["dataset"] + model = args["model"] + method = args["method"] + prompt = args["prompt"] + + show_model(dataset, model, method, prompt) + + +def show_all(): + datasets = baseline_json.keys() + for dataset in datasets: + models = baseline_json[dataset].keys() + for model in models: + methods = baseline_json[dataset][model].keys() + for method in methods: + prompts = baseline_json[dataset][model][method].keys() + for prompt in prompts: + # 1.get scores info + acc_data = baseline_json[dataset][model][method][prompt]["acc"] + + # 2.print models info + print_table_models = print_models_info( + dataset, model, method, prompt + ) + print(print_table_models) + + # 3.print scores info + print_table_scores = print_scores_info(acc_data) + print(print_table_scores) + + +def show_all_api(): + show_all() + + +# def update(): +# # todo : 更新baseline.json +# # + + +if __name__ == "__main__": + # args + show_args = { + "dataset": "spider", + "model": "llama2-7b-hf", + "method": "lora", + "prompt": "alpaca", + } + show_model(show_args) + + show_all() diff --git a/dbgpt_hub/baseline/show_result_api.py b/dbgpt_hub/baseline/show_result_api.py new file mode 100644 index 0000000..18128da --- /dev/null +++ b/dbgpt_hub/baseline/show_result_api.py @@ -0,0 +1,34 @@ +from typing import Optional, Dict, Any + +from dbgpt_hub.baseline import show_result + + +def show_all(): + show_result.show_all_api() + + +def show_model(args: Optional[Dict[str, Any]] = None): + # Arguments for show result + if args is None: + args = { + "dataset": "spider", + "model": "llama2-7b-hf", + "sft": "lora", + "prompt": "alpaca", + } + else: + args = args + + show_result.show_model_api(args) + + +if __name__ == "__main__": + show_all() + + show_args = { + "dataset": "spider", + "model": "llama2-7b-hf", + "method": "lora", + "prompt": "alpaca", + } + show_model(show_args) From d47cef34b123169d28cb5fe0331d8252e5758536 Mon Sep 17 00:00:00 2001 From: Danrui Qi Date: Tue, 5 Dec 2023 14:08:28 +0800 Subject: [PATCH 2/3] Update README.md (#165) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cef9530..0b8dcaa 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,7 @@ git clone https://github.com/eosphoros-ai/DB-GPT-Hub.git cd DB-GPT-Hub conda create -n dbgpt_hub python=3.10 conda activate dbgpt_hub -conda install -c conda-forge poetry>=1.4.0 +pip install poetry poetry install ``` From 96aaabd67f90196e6bcc758cbba207bfb0e32218 Mon Sep 17 00:00:00 2001 From: Danrui Qi Date: Tue, 5 Dec 2023 14:08:55 +0800 Subject: [PATCH 3/3] Update README.zh.md (#164) --- README.zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.zh.md b/README.zh.md index dc1ada7..f29666f 100644 --- a/README.zh.md +++ b/README.zh.md @@ -106,7 +106,7 @@ git clone https://github.com/eosphoros-ai/DB-GPT-Hub.git cd DB-GPT-Hub conda create -n dbgpt_hub python=3.10 conda activate dbgpt_hub -conda install -c conda-forge poetry>=1.4.0 +pip install poetry poetry install ```