forked from krasserm/perceiver-io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.sh
24 lines (24 loc) · 782 Bytes
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
python -m perceiver.scripts.text.clm fit \
--model.max_latents=512 \
--model.cross_attention_dropout=0.5 \
--model.post_attention_dropout=0.0 \
--data=WikiTextDataModule \
--data.tokenizer=deepmind/language-perceiver \
--data.add_special_tokens=false \
--data.max_seq_len=4096 \
--data.task=clm \
--data.batch_size=24 \
--data.padding_side=left \
--data.random_train_shift=true \
--optimizer=Adam \
--optimizer.lr=2e-4 \
--lr_scheduler.warmup_steps=200 \
--trainer.max_steps=20000 \
--trainer.accelerator=gpu \
--trainer.devices=2 \
--trainer.val_check_interval=0.5 \
--trainer.gradient_clip_val=0.5 \
--trainer.accumulate_grad_batches=2 \
--trainer.logger=TensorBoardLogger \
--trainer.logger.save_dir=logs \
--trainer.logger.name=clm