-
Notifications
You must be signed in to change notification settings - Fork 12
/
accelerated-peft-bnb-nf4-foak-sample-configuration.yaml
47 lines (37 loc) · 1.44 KB
/
accelerated-peft-bnb-nf4-foak-sample-configuration.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# FMS Acceleration Plugin Configuration.
#
# Each stanza incorporates various configurations for
# different fine-tuning / training tasks.
plugins:
# PEFT-related acceleration
peft:
# quantization-releated acceleration
# e.g., kernels for quantized base weights
quantization:
# For loading BitsAndBytes quantized layers
# to serve as 4bit base-weights for LoRA PEFT-tuning.
# NOTE: currently AutoGPTQ is not properly integrated into huggingface /
# bitsandbytes, thus recommended quant_type to be either "nf4"
# or "fp4".
# bitsandbytes:
bitsandbytes:
quant_type: nf4
# If True, then no get_peft_model and prepare_model_for_kbit_training
# will be called.
no_peft_model: false
fused_ops_and_kernels:
# load unsloth optimizations for these 4bit base layer weights.
# currently only support "auto_gptq" and "bitsandbytes"
base_layer: bitsandbytes
# activate various unsloth optimizations
# there are two versions of the plugin
# - the FastKernel version supports individual kernels
# - the FastQuantized version is all-or-nothing
# fused kernels for lora linear layers
fused_lora: true
# fast loss triton kernels
fast_loss: true
# fast rms norm triton kernels
fast_rms_layernorm: true
# fast RoPE embedding triton kernels
fast_rope_embeddings: true