Add API interfaces for train, predict and evaluate processes #160

qidanrui · 2023-12-02T10:53:23Z

Firstly, install dbgpt_hub with the following command

pip install dbgpt_hub

Then, set up the arguments and run the whole process.

from dbgpt_hub.data_process import preprocess_sft_data
from dbgpt_hub.train import start_sft
from dbgpt_hub.predict import start_predict
from dbgpt_hub.eval import start_evaluate

data_folder = "dbgpt_hub/data"
data_info = [
        {
            "data_source": "spider",
            "train_file": ["train_spider.json", "train_others.json"],
            "dev_file": ["dev.json"],
            "tables_file": "tables.json",
            "db_id_name": "db_id",
            "is_multiple_turn": False,
            "train_output": "spider_train.json",
            "dev_output": "spider_dev.json",
        }
]

train_args = {
            "model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf",
            "do_train": True,
            "dataset": "example_text2sql_train",
            "max_source_length": 2048,
            "max_target_length": 512,
            "finetuning_type": "lora",
            "lora_target": "q_proj,v_proj",
            "template": "llama2",
            "lora_rank": 64,
            "lora_alpha": 32,
            "output_dir": "dbgpt_hub/output/adapter/CodeLlama-13b-sql-lora",
            "overwrite_cache": True,
            "overwrite_output_dir": True,
            "per_device_train_batch_size": 1,
            "gradient_accumulation_steps": 16,
            "lr_scheduler_type": "cosine_with_restarts",
            "logging_steps": 50,
            "save_steps": 2000,
            "learning_rate": 2e-4,
            "num_train_epochs": 8,
            "plot_loss": True,
            "bf16": True,
}

predict_args = {
            "model_name_or_path": "codellama/CodeLlama-13b-Instruct-hf",
            "template": "llama2",
            "finetuning_type": "lora",
            "checkpoint_dir": "dbgpt_hub/output/adapter/CodeLlama-13b-sql-lora",
            "predict_file_path": "dbgpt_hub/data/eval_data/dev_sql.json",
            "predict_out_dir": "dbgpt_hub/output/",
            "predicted_out_filename": "pred_sql.sql",
}

evaluate_args =  {
            "input": "./dbgpt_hub/output/pred/pred_sql_dev_skeleton.sql",
            "gold": "./dbgpt_hub/data/eval_data/gold.txt",
            "gold_natsql": "./dbgpt_hub/data/eval_data/gold_natsql2sql.txt",
            "db": "./dbgpt_hub/data/spider/database",
            "table": "./dbgpt_hub/data/eval_data/tables.json",
            "table_natsql": "./dbgpt_hub/data/eval_data/tables_for_natsql2sql.json",
            "etype": "exec",
            "plug_value": True,
            "keep_distict": False,
            "progress_bar_for_each_datapoint": False,
            "natsql": False,
}

preprocess_sft_data(
      data_folder = data_folder,
      data_info = data_info
)

start_sft(train_args)
start_predict(predict_args)
start_evaluate(evaluate_args)

…d-workflow

wangzaistone

great!

csunny

awesome， LGTM🚀

qidanrui added 6 commits November 30, 2023 07:43

remove pygraphviz in poetry

35e5e78

Merge branch 'main' of https://github.com/qidanrui/DB-GPT-Hub into ad…

6b1780d

…d-workflow

update bitsandbytes to 0.41.2

8359ba8

handle invalid output from models

b35533e

Merge branch 'main' of https://github.com/qidanrui/DB-GPT-Hub into ad…

55e7a0c

…d-workflow

add API interfaces for train, predict and evaluate

b17e6bd

wangzaistone reviewed Dec 2, 2023

View reviewed changes

csunny approved these changes Dec 4, 2023

View reviewed changes

csunny merged commit 961287b into eosphoros-ai:main Dec 4, 2023
4 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add API interfaces for train, predict and evaluate processes #160

Add API interfaces for train, predict and evaluate processes #160

qidanrui commented Dec 2, 2023

wangzaistone left a comment

csunny left a comment

Add API interfaces for train, predict and evaluate processes #160

Add API interfaces for train, predict and evaluate processes #160

Conversation

qidanrui commented Dec 2, 2023

wangzaistone left a comment

Choose a reason for hiding this comment

csunny left a comment

Choose a reason for hiding this comment