-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_eccv_bigfb_model1_debug.sh
52 lines (43 loc) · 1.49 KB
/
train_eccv_bigfb_model1_debug.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/bin/bash
#
#SBATCH --job-name=bigfb1_debug_jester
#SBATCH --output=eccv_bigfb_model1_debug_%j.log
#SBATCH --error=eccv_bigfb_model1_debug_%j.log
#
#SBATCH --time=12:00:00
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=5
#SBATCH --mem=64000M
#SBATCH --tmp=64000M
#SBATCH --gres gpu:4
date
hostname
echo -n 'loading modules ... '
module load GCC/4.9.2-binutils-2.25
module load OpenMPI/1.8.5
module load Python/3.6.0
module load tensorflow/1.5.0-cp36
echo 'done!'
echo -n 'activating python virtualenv ... '
source $HOME/.local/venv/bin/activate
echo 'done!'
echo '---log system information---'
echo 'num_cpus' $(nproc)
nvidia-smi
df -Th
free -h
echo 'done!'
TMPFS=$TMPDIR
echo -n 'creating data dir' $TMPFS '... '
mkdir -p $TMPFS
echo 'done!'
date
echo -n 'extracting data into dir:' $TMPFS '... '
pv $WORK/datasets/20bn-jester.xs.tar | tar --skip-old-files -xf - -C $TMPFS
echo 'done!'
date
export DATA_ROOT=$TMPFS/20bn-jester
echo 'running training script'
$HOME/projects/vfeedbacknet/scripts/jemmons_train_20bn-jester.xs.sh 0,1,2,3 vfeedbacknet_eccv_bigfb_model1_debug $WORK/vfeedbacknet-results/20bn/vfeedbacknet_eccv_bigfb_model1_debug.xs --video_length=20 --video_height=112 --video_width=112 --video_downsample_ratio=2 --learning_rate_init=0.1 --learning_rate_decay=0.998 --learning_rate_min=0.001 --global_step_init 0 --train_batch_size=32 --prefetch_batch_size=256 --validation_interval=16 --last_loss_multipler=4 --num_gpus=4 --num_cpus=5 --pretrain_root_prefix=$WORK/pretrained-models
date
echo 'finshed.'