-
Notifications
You must be signed in to change notification settings - Fork 1
/
config_small.yml
81 lines (67 loc) · 2.62 KB
/
config_small.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Model info
# google/gemma-2b-it
# Link: https://huggingface.co/google/gemma-2b-it
# google/gemma-7b-it
# Link: https://huggingface.co/google/gemma-7b-it
# meta-llama/Meta-Llama-3-8B-Instruct
# Link: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
model_name : "google/gemma-2b-it"
# Dataset info
dataset_hf : "shuyuej/PodGPT-Demo-Data"
# This is my Hugging Face `read` and `write` tokens. Please replace it to yours.
# `read` token: for downloading models
# `write` token: for uploading your models to Hugging Face
# For your information: https://huggingface.co/settings/tokens
hf_read_token : "YOUR_HUGGING_FACE_READ_TOKEN" # Hugging Face `read` Token
hf_write_token : "YOUR_HUGGING_FACE_WRITE_TOKEN" # Hugging Face `write` Token
# Evaluate the original pre-trained model's performance
eval_pretrain : False
# Saving path
result_dir : "./results"
response_dir : "./Final-Results"
save_dir : "./save_folder"
data_save_dir : "./save_folder/data"
# Training length and number of the generated tokens
train_max_len : 2048
max_new_tokens : 1024
# Batch size
train_batch_size : 2
# Number of training epochs
epochs : 5
# Optimizer, Learning rate schedule, warm-up ratio
optim : "adamw_torch"
lr_scheduler_type : "cosine"
warmup_ratio : 0.03
# activation checkpointing
# When enabled, a lot of memory can be freed at the cost of small decrease in the training speed
# due to recomputing parts of the graph during back-propagation.
gradient_checkpointing : True
# Number of update steps to accumulate the gradients
gradient_accumulation_steps : 1
# Initial learning rate
learning_rate : 0.000005
# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay : 0.01
# Save the model for a number of steps
save_strategy : "epoch"
# Whether to save the optimizer and scheduler
save_only_model : True
# Number of total saved checkpoints
save_total_limit : 10
# Log every X updates steps
logging_steps : 1
# Logging save platform
log_save_platform : "tensorboard"
# Output directory where the model predictions and checkpoints will be stored
# Enable fp16/bf16 training (set bf16 to True with an A100)
# Whether to use fp16 16-bit (mixed) precision training instead of 32-bit training.
fp16 : False
# Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. Requires Ampere or higher
# NVIDIA architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change.
bf16 : True
# Choose which GPU to use
device_map : "auto"
# The number of GPUs and GPU utilization for the vLLM Engine
# https://docs.vllm.ai/en/latest/serving/distributed_serving.html
num_gpus_vllm : 1
gpu_utilization_vllm : 0.75