-
Notifications
You must be signed in to change notification settings - Fork 12
/
run_benchmarks.sh
166 lines (140 loc) · 5.45 KB
/
run_benchmarks.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env bash
set -x
echo "FMS Acceleration Benchmarking Script"
echo "Please run this script as "
echo "bash scripts/run_benchmarks.sh ... from the root of the repo"
# TODO: this can be improved. For now we assume we always run from
# root of repo
WORKING_DIR=scripts/benchmarks
# pointing to the configuration directory of the repo
CONFIG_DIR=sample-configurations
# ------------- MAIN CONFIGS -----------------
SCENARIOS_CONFIG=scenarios.yaml
DEFAULTS_CONFIG=defaults.yaml
ACCELERATE_CONFIG=accelerate.yaml
# ------------- SCENARIO CONFIGS -----------------
# this determines which is the default subset
SCNTAG_PEFT_AUTOGPTQ=accelerated-peft-gptq
# ------------- OTHER CONFIGS -----------------
# data will be cached in here
DATA_CACHE=data
# final result placed here
BENCH_RESULT_FILE=benchmarks.csv
# freeze the pip requirements here
PIP_REQUIREMENTS_FILE=requirements.txt
# ------------- DROP COLUMNS FRO RESULTS -----------------
# env inputs
DRY_RUN=${DRY_RUN:-"false"}
NO_DATA_PROCESSING=${NO_DATA_PROCESSING:-"false"}
NO_OVERWRITE=${NO_OVERWRITE:-"false"}
MEMORY_LOGGING=${MEMORY_LOGGING:-"all"}
# inputs
NUM_GPUS_MATRIX=${1:-"1 2"}
EFFECTIVE_BS_MATRIX=${2:-"4 8"}
RESULT_DIR=${3:-"benchmark_outputs"}
SCENARIOS_CONFIG=${4:-$SCENARIOS_CONFIG}
SCENARIOS_FILTER=${5-$SCNTAG_PEFT_AUTOGPTQ}
echo "NUM_GPUS_MATRIX: $NUM_GPUS_MATRIX"
echo "RESULT_DIR: $RESULT_DIR"
echo "SCENARIOS_CONFIG: $SCENARIOS_CONFIG"
echo "SCENARIOS_FILTER: $SCENARIOS_FILTER"
echo "MEMORY_LOGGING: $MEMORY_LOGGING"
if [ -n "$RESULT_DIR" ]; then
echo "The results directory is not empty. "
if [ "$NO_OVERWRITE" = "true" ]; then
echo "Results dir $RESULT_DIR is not empty, but NO_OVERWRITE=true"
echo "If intending to overwrite please delete the folder manually"
echo "or do not set NO_OVERWRITE"
else
echo "Deleting $RESULT_DIR"
rm -rf $RESULT_DIR
fi
fi
# tag on the directories
SCENARIOS_CONFIG=$WORKING_DIR/$SCENARIOS_CONFIG
DEFAULTS_CONFIG=$WORKING_DIR/$DEFAULTS_CONFIG
ACCELERATE_CONFIG=$WORKING_DIR/$ACCELERATE_CONFIG
DATA_CACHE=$RESULT_DIR/$DATA_CACHE
BENCH_RESULT_FILE=$RESULT_DIR/$BENCH_RESULT_FILE
PIP_REQUIREMENTS_FILE=$RESULT_DIR/$PIP_REQUIREMENTS_FILE
# ------------- EXTRA ARGS -----------------
# preload models by default
EXTRA_ARGS="--preload_models"
if [ "$SCENARIOS_FILTER" != "none" ]; then
EXTRA_ARGS="$EXTRA_ARGS --run_only_scenarios $SCENARIOS_FILTER"
fi
if [ "$DRY_RUN" = "true" ]; then
EXTRA_ARGS="$EXTRA_ARGS --dry_run"
fi
if [ "$NO_DATA_PROCESSING" = "true" ]; then
EXTRA_ARGS="$EXTRA_ARGS --no_data_processing"
fi
if [ "$MEMORY_LOGGING" = "huggingface" ]; then
EXTRA_ARGS="$EXTRA_ARGS --log_memory_hf"
elif [ "$MEMORY_LOGGING" = "nvidia" ]; then
EXTRA_ARGS="$EXTRA_ARGS --log_nvidia_smi"
elif [ "$MEMORY_LOGGING" = "all" ]; then
EXTRA_ARGS="$EXTRA_ARGS --log_nvidia_smi --log_memory_hf"
fi
# dump out the environment
if [ ! "$NO_OVERWRITE" = "true" ]; then
echo "Creating $RESULT_DIR"
mkdir -p $RESULT_DIR
pip freeze > $PIP_REQUIREMENTS_FILE
fi
# run the bench
PYTHONPATH=. \
python $WORKING_DIR/benchmark.py \
--num_gpus $NUM_GPUS_MATRIX \
--effective_batch_size_matrix $EFFECTIVE_BS_MATRIX \
--scenarios_config_path $SCENARIOS_CONFIG \
--accelerate_config $ACCELERATE_CONFIG \
--defaults_config_path $DEFAULTS_CONFIG \
--dataset_save_path $DATA_CACHE \
--results_output_path $RESULT_DIR $EXTRA_ARGS
# produce the final CSV for checkin
# need to set PYTHONPATH because there is an import inside
# this will write to the BENCH_RESULT_FILE
# Remove the columns with values already represented by other metrics in the summary report
PYTHONPATH=. \
python $WORKING_DIR/display_bench_results.py $RESULT_DIR \
--result_file $BENCH_RESULT_FILE \
--keep_columns \
'torch_dtype' \
"framework_config" \
"peft_method" \
"model_name_or_path" \
"num_gpus" \
"per_device_train_batch_size" \
"mem_nvidia_mem_reserved" \
"mem_peak_torch_mem_alloc_in_bytes" \
"mem_torch_mem_alloc_in_bytes" \
"train_tokens_per_second" \
--remove_columns \
'before_init_mem_cpu' \
'before_init_mem_gpu' \
'init_mem_cpu_alloc_delta' \
'init_mem_cpu_peaked_delta' \
'init_mem_gpu_alloc_delta' \
'init_mem_gpu_peaked_delta' \
'train_mem_cpu_alloc_delta' \
'train_mem_cpu_peaked_delta' \
'train_mem_gpu_alloc_delta' \
'train_mem_gpu_peaked_delta' \
'training_data_path' \
'error_messages' \
'acceleration_framework_config_file'
# For every new benchmark run, it is good practice to perform a regression check
# against a previous known set of benchmark results. This repo provides a convenient comparison
# tool that analyses the differences of metrics like loss and throughput between an old and new set
# of benchmark results.
# To use this tool simply run the following python command
# PYTHONPATH=. \
# python $WORKING_DIR/compare_with_reference.py
# The following arguments can be used to further configure the analysis, otherwise it uses default values
# arguments:
# --result_dir <Output directory to save comparison artifacts>
# --reference_benchmark_filepath <filepath of the old benchmark results to compare againts>
# --threshold_ratio <to define an acceptable difference between old and new results>
# --indices <defines the set of column names used as unique identifier to merge the 2 sets of results>
# --plot_columns <specifies the metric name to be compared and vizualized>