config_small.yml

# Model info
# google/gemma-2b-it
# Link: https://huggingface.co/google/gemma-2b-it
# google/gemma-7b-it
# Link: https://huggingface.co/google/gemma-7b-it
# meta-llama/Meta-Llama-3-8B-Instruct
# Link: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct
model_name : "google/gemma-2b-it"

# Dataset info
dataset_hf : "shuyuej/PodGPT-Demo-Data"

# This is my Hugging Face `read` and `write` tokens. Please replace it to yours.
# `read` token: for downloading models
# `write` token: for uploading your models to Hugging Face
# For your information: https://huggingface.co/settings/tokens
hf_read_token : "YOUR_HUGGING_FACE_READ_TOKEN"  # Hugging Face `read` Token
hf_write_token : "YOUR_HUGGING_FACE_WRITE_TOKEN"  # Hugging Face `write` Token

# Evaluate the original pre-trained model's performance
eval_pretrain : False

# Saving path
result_dir : "./results"
response_dir : "./Final-Results"
save_dir : "./save_folder"
data_save_dir : "./save_folder/data"

# Training length and number of the generated tokens
train_max_len : 2048
max_new_tokens : 1024

# Batch size
train_batch_size : 2

# Number of training epochs
epochs : 5

# Optimizer, Learning rate schedule, warm-up ratio
optim : "adamw_torch"
lr_scheduler_type : "cosine"
warmup_ratio : 0.03

# activation checkpointing
# When enabled, a lot of memory can be freed at the cost of small decrease in the training speed
# due to recomputing parts of the graph during back-propagation.
gradient_checkpointing : True
# Number of update steps to accumulate the gradients
gradient_accumulation_steps : 1
# Initial learning rate
learning_rate : 0.000005
# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay : 0.01

# Save the model for a number of steps
save_strategy : "epoch"
# Whether to save the optimizer and scheduler
save_only_model : True
# Number of total saved checkpoints
save_total_limit : 10

# Log every X updates steps
logging_steps : 1
# Logging save platform
log_save_platform : "tensorboard"

# Output directory where the model predictions and checkpoints will be stored
# Enable fp16/bf16 training (set bf16 to True with an A100)
# Whether to use fp16 16-bit (mixed) precision training instead of 32-bit training.
fp16 : False
# Whether to use bf16 16-bit (mixed) precision training instead of 32-bit training. Requires Ampere or higher
# NVIDIA architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change.
bf16 : True

# Choose which GPU to use
device_map : "auto"

# The number of GPUs and GPU utilization for the vLLM Engine
# https://docs.vllm.ai/en/latest/serving/distributed_serving.html
num_gpus_vllm : 1
gpu_utilization_vllm : 0.75