diff --git a/devtools/dev_sharegpt.yml b/devtools/dev_chat_template.yml similarity index 92% rename from devtools/dev_sharegpt.yml rename to devtools/dev_chat_template.yml index 9c65b49dcd..9697da4b33 100644 --- a/devtools/dev_sharegpt.yml +++ b/devtools/dev_chat_template.yml @@ -7,8 +7,8 @@ load_in_8bit: true load_in_4bit: false datasets: - - path: philschmid/guanaco-sharegpt-style - type: sharegpt + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template shards: 10 val_set_size: 0 output_dir: temp_debug/axolotl_outputs/model diff --git a/docs/debugging.qmd b/docs/debugging.qmd index 1d0779b073..029549d85b 100644 --- a/docs/debugging.qmd +++ b/docs/debugging.qmd @@ -51,12 +51,12 @@ While debugging it's helpful to simplify your test scenario as much as possible. ### Background -The below example shows how to configure VSCode to debug data preprocessing of the `sharegpt` format. This is the format used when you have the following in your axolotl config: +The below example shows how to configure VSCode to debug data preprocessing of the `chat_template` format. This is the format used when you have the following in your axolotl config: ```yaml datasets: - - path: # example on HF Hub: philschmid/guanaco-sharegpt-style - type: sharegpt + - path: # example on HF Hub: fozziethebeat/alpaca_messages_2k_test + type: chat_template ``` >[!Important] @@ -83,7 +83,7 @@ If you developing on a remote host, you can easily use VSCode to debug remotely. The easiest way to get started is to modify the [.vscode/launch.json](../.vscode/launch.json) file in this project. This is just an example configuration, so you may need to modify or copy it to suit your needs. -For example, to mimic the command `cd devtools && CUDA_VISIBLE_DEVICES=0 accelerate launch -m axolotl.cli.train dev_sharegpt.yml`, you would use the below configuration[^1]. Note that we add additional flags that override the axolotl config and incorporate the tips above (see the comments). We also set the working directory to `devtools` and set the `env` variable `HF_HOME` to a temporary folder that is later partially deleted. This is because we want to delete the HF dataset cache before each run in order to ensure that the data preprocessing code is run from scratch. +For example, to mimic the command `cd devtools && CUDA_VISIBLE_DEVICES=0 accelerate launch -m axolotl.cli.train dev_chat_template.yml`, you would use the below configuration[^1]. Note that we add additional flags that override the axolotl config and incorporate the tips above (see the comments). We also set the working directory to `devtools` and set the `env` variable `HF_HOME` to a temporary folder that is later partially deleted. This is because we want to delete the HF dataset cache before each run in order to ensure that the data preprocessing code is run from scratch. ```jsonc // .vscode/launch.json @@ -91,12 +91,12 @@ For example, to mimic the command `cd devtools && CUDA_VISIBLE_DEVICES=0 acceler "version": "0.2.0", "configurations": [ { - "name": "Debug axolotl prompt - sharegpt", + "name": "Debug axolotl prompt - chat_template", "type": "python", "module": "accelerate.commands.launch", "request": "launch", "args": [ - "-m", "axolotl.cli.train", "dev_sharegpt.yml", + "-m", "axolotl.cli.train", "dev_chat_template.yml", // The flags below simplify debugging by overriding the axolotl config // with the debugging tips above. Modify as needed. "--dataset_processes=1", // limits data preprocessing to one process @@ -240,6 +240,6 @@ style="border-radius: 10px; display: block; margin: auto;" width="560" height="3
-[^1]: The config actually mimics the command `CUDA_VISIBLE_DEVICES=0 python -m accelerate.commands.launch -m axolotl.cli.train devtools/sharegpt.yml`, but this is the same thing. +[^1]: The config actually mimics the command `CUDA_VISIBLE_DEVICES=0 python -m accelerate.commands.launch -m axolotl.cli.train devtools/chat_template.yml`, but this is the same thing. [^2]: Many of the below flags are recommended best practices by Nvidia when using nvidia-container-toolkit. You can read more about these flags [here](https://docs.nvidia.com/deeplearning/frameworks/user-guide/index.html). diff --git a/examples/deepseek-v2/qlora-fsdp-2_5.yaml b/examples/deepseek-v2/qlora-fsdp-2_5.yaml index 6e82062d66..0320e02138 100644 --- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml +++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml @@ -16,7 +16,10 @@ chat_template: deepseek_v2 datasets: - path: mlabonne/FineTome-100k type: chat_template - split: train + split: train[:20%] + field_messages: conversations + message_field_role: from + message_field_content: value dataset_prepared_path: last_run_prepared val_set_size: 0.0 diff --git a/examples/gemma2/qlora.yml b/examples/gemma2/qlora.yml index b6dd653750..00e6d84e0d 100644 --- a/examples/gemma2/qlora.yml +++ b/examples/gemma2/qlora.yml @@ -11,8 +11,11 @@ chat_template: gemma datasets: - path: cgato/SlimOrcaDedupCleaned type: chat_template - chat_template: gemma drop_system_message: true + field_messages: conversations + message_field_role: from + message_field_content: value + val_set_size: 0.0 output_dir: ./outputs/out diff --git a/examples/jamba/qlora_fsdp_large.yaml b/examples/jamba/qlora_fsdp_large.yaml index 28316efd57..84cf906422 100644 --- a/examples/jamba/qlora_fsdp_large.yaml +++ b/examples/jamba/qlora_fsdp_large.yaml @@ -4,11 +4,15 @@ tokenizer_type: AutoTokenizer load_in_4bit: true strict: false use_tensorboard: true +chat_template: jamba datasets: - path: cgato/SlimOrcaDedupCleaned type: chat_template - chat_template: jamba drop_system_message: true + field_messages: conversations + message_field_role: from + message_field_content: value + dataset_prepared_path: last_run_prepared val_set_size: 0.0 output_dir: jamba-large-fsdp-qlora-ft diff --git a/examples/llama-3/fft-8b-liger-fsdp.yaml b/examples/llama-3/fft-8b-liger-fsdp.yaml index e84d221f85..99ba63fcc6 100644 --- a/examples/llama-3/fft-8b-liger-fsdp.yaml +++ b/examples/llama-3/fft-8b-liger-fsdp.yaml @@ -14,6 +14,10 @@ datasets: - path: mlabonne/FineTome-100k type: chat_template split: train[:20%] + field_messages: conversations + message_field_role: from + message_field_content: value + dataset_prepared_path: last_run_prepared val_set_size: 0.02 output_dir: ./outputs/out diff --git a/examples/phi/lora-3.5.yaml b/examples/phi/lora-3.5.yaml index 59d667b8db..246701148c 100644 --- a/examples/phi/lora-3.5.yaml +++ b/examples/phi/lora-3.5.yaml @@ -10,7 +10,6 @@ chat_template: phi_3 datasets: - path: fozziethebeat/alpaca_messages_2k_test type: chat_template - chat_template: phi_3 field_messages: messages message_field_role: role message_field_content: content