Skip to content

Commit

Permalink
added scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
huseinzol05 committed Nov 10, 2024
1 parent a4b3a8f commit e5383a5
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 5 deletions.
10 changes: 5 additions & 5 deletions session/llama3/lora-256-8b.sh → session/llama3/lora-128-8b.sh
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
WANDB_PROJECT=unsloth-Meta-Llama-3.1-8B-Instruct-lora-128-embedding-16k-multipack \
WANDB_PROJECT=unsloth-Meta-Llama-3.1-8B-Instruct-lora-128-embedding-8k-multipack \
deepspeed run-instruction-lora-embedding-multipack.py \
--deepspeed ds_config_zero3.json \
--model_name_or_path unsloth/Meta-Llama-3.1-8B-Instruct \
--per_device_train_batch_size 1 \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 6 \
--output_dir unsloth-Meta-Llama-3.1-8B-Instruct-lora-128-embedding-16k-multipack \
--output_dir unsloth-Meta-Llama-3.1-8B-Instruct-lora-128-embedding-8k-multipack \
--bf16 \
--do_train \
--do_eval false \
--num_train_epochs 5 \
--train_file 'malaysian-llama3.1-24k-language-multipack' \
--train_file 'malaysian-llama3.1-8k-language-multipack' \
--logging_steps 1 \
--learning_rate 2e-5 \
--embedding_learning_rate 5e-6 \
--learning_rate 2e-5 \
--weight_decay 0.01 \
--block_size 24576 \
--save_steps 20 \
Expand Down
25 changes: 25 additions & 0 deletions session/llama3/lora-256-smollm2-360m.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
WANDB_PROJECT="lora-embedding-256-HuggingFaceTB-SmolLM2-360M-Instruct-multipack" \
TORCH_DISTRIBUTED_DEBUG="info" \
torchrun --nproc_per_node 2 \
-m run-instruction-lora-embedding-multipack \
--model_name_or_path HuggingFaceTB/SmolLM2-360M-Instruct \
--per_device_train_batch_size 6 \
--gradient_accumulation_steps 4 \
--output_dir lora-embedding-256-HuggingFaceTB-SmolLM2-360M-Instruct-multipack \
--bf16 --do_train --do_eval false --num_train_epochs 5 \
--train_file /home/husein/ssd4/continue-training/packing-4096 \
--logging_steps 1 \
--learning_rate 2e-5 \
--learning_rate 2e-5 \
--weight_decay 0.01 \
--block_size 24576 \
--save_steps 100 \
--save_total_limit 3 \
--gradient_checkpointing true \
--neftune_noise_alpha 5.0 \
--torch_dtype bfloat16 \
--rank 256 \
--ddp_find_unused_parameters false \
--include_num_input_tokens_seen true \
--dataloader_num_workers 3 \
--dataloader_prefetch_factor 4
22 changes: 22 additions & 0 deletions session/translation/end-to-end/nanot5-base-coding.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
WANDB_PROJECT="nanot5-base-malaysian-cased-translation-v2-coding" \
CUDA_VISIBLE_DEVICES=0 \
python3.10 run_t5_v2.py \
--model_name_or_path mesolitica/nanot5-base-malaysian-translation-v2 \
--num_train_epochs 2 \
--eval_steps 1000000000 \
--logging_steps 2 \
--save_steps 200 \
--save_total_limit 3 \
--do_train \
--train_file mosaic-coding \
--output_dir nanot5-base-malaysian-cased-translation-v2-coding \
--dataloader_num_workers=10 \
--per_device_train_batch_size=2 \
--per_device_eval_batch_size=3 \
--gradient_accumulation_steps=8 \
--max_source_length 2048 \
--max_target_length 2048 \
--learning_rate 2e-5 \
--gradient_checkpointing true \
--weight_decay 0.01 \
--bf16
24 changes: 24 additions & 0 deletions session/translation/end-to-end/nanot5-small-coding.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
WANDB_PROJECT="nanot5-small-malaysian-cased-translation-v2-coding" \
CUDA_VISIBLE_DEVICES=1 \
torchrun \
--nproc_per_node 1 \
-m run_t5_v2 \
--model_name_or_path mesolitica/nanot5-small-malaysian-translation-v2 \
--num_train_epochs 2 \
--eval_steps 1000000000 \
--logging_steps 2 \
--save_steps 200 \
--save_total_limit 3 \
--do_train \
--train_file mosaic-coding \
--output_dir nanot5-small-malaysian-cased-translation-v2-coding \
--dataloader_num_workers=10 \
--per_device_train_batch_size=2 \
--per_device_eval_batch_size=3 \
--gradient_accumulation_steps=8 \
--max_source_length 2048 \
--max_target_length 2048 \
--learning_rate 2e-5 \
--gradient_checkpointing true \
--weight_decay 0.01 \
--bf16

0 comments on commit e5383a5

Please sign in to comment.