-
Notifications
You must be signed in to change notification settings - Fork 12
/
accelerated-peft-autogptq-foak-sample-configuration.yaml
52 lines (40 loc) · 1.72 KB
/
accelerated-peft-autogptq-foak-sample-configuration.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# FMS Acceleration Plugin Configuration.
#
# Each stanza incorporates various configurations for
# different fine-tuning / training tasks.
plugins:
# PEFT-related acceleration
peft:
# quantization-releated acceleration
# e.g., kernels for quantized base weights
quantization:
# AutoGPTQ quantized base weights.
auto_gptq:
# Kernel to be used for GPTQ linear laeyer
# NOTE: Not all kernels are suitable for PEFT training; need to use
# kernels that support autograd forward / backward. The best
# recommendation at the moment is "triton_v2".
kernel: triton_v2
# If true, then will already expect quantized checkpoint
# passed into TrainingArguments.model_name_or_path
from_quantized: true
# Setting to false, will create GPTQ-LORA using the local autogptq package.
# if true, will create legacy implementation of GPTQ-LORA using external
# `auto_gptq`. Refer to README for installation instructions
use_external_lib: false
fused_ops_and_kernels:
# load unsloth optimizations for these 4bit base layer weights.
# currently only support "auto_gptq" and "bitsandbytes"
base_layer: auto_gptq
# activate various unsloth optimizations
# there are two versions of the plugin
# - the FastKernel version supports individual kernels
# - the FastQuantized version is all-or-nothing
# fused kernels for lora linear layers
fused_lora: true
# fast loss triton kernels
fast_loss: true
# fast rms norm triton kernels
fast_rms_layernorm: true
# fast RoPE embedding triton kernels
fast_rope_embeddings: true