Skip to content

Commit

Permalink
feat: Add extra metadata. Update Slurm scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugoch committed Sep 27, 2024
1 parent c8c1687 commit 609ae8c
Show file tree
Hide file tree
Showing 9 changed files with 1,184 additions and 112 deletions.
73 changes: 38 additions & 35 deletions extra/slurm/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,47 @@

def main():
models = [
('meta-llama/Meta-Llama-3.1-8B-Instruct', 1),
('meta-llama/Meta-Llama-3.1-70B-Instruct', 4),
('meta-llama/Llama-3.1-8B-Instruct', 1),
('meta-llama/Llama-3.1-70B-Instruct', 4),
('mistralai/Mixtral-8x7B-Instruct-v0.1', 2),
('neuralmagic/Meta-Llama-3-70B-Instruct-FP8', 2),
]
num_passes = 2
engines = ['tgi', 'vllm']
for model in models:
print(f"Submitting job for {model[0]}")
gpus = model[1]
cpus_per_task = gpus * CPUS_PER_GPU
for engine in engines:
job_name = f'bench_{engine}_{model[0].replace("/", "_")}'
args = ['sbatch',
'--job-name', job_name,
'--output', f'/fsx/%u/logs/%x-%j.log',
'--time', '1:50:00',
'--qos', 'normal',
'--partition', 'hopper-prod',
'--gpus', str(gpus),
'--ntasks', '1',
'--cpus-per-task', str(cpus_per_task),
'--mem-per-cpu', str(MEM_PER_CPU_GB) + 'G',
'--nodes', '1',
':',
'--gpus', '1',
'--ntasks', '1',
'--cpus-per-task', str(CPUS_PER_GPU),
'--mem-per-cpu', str(MEM_PER_CPU_GB) + 'G',
'--nodes', '1',
f'{engine}.slurm']
env = os.environ.copy()
env['MODEL'] = model[0]
process = subprocess.run(args, capture_output=True,
env=env)
print(process.stdout.decode())
print(process.stderr.decode())
if process.returncode != 0:
print(f'Error while submitting :: {args}')
exit(1)
for i in range(num_passes):
for model in models:
print(f"PASS {i} - Submitting job for {model[0]}")
gpus = model[1]
cpus_per_task = gpus * CPUS_PER_GPU
for engine in engines:
job_name = f'bench_{model[0].replace("/", "_")}_{engine}_pass_{i}'
args = ['sbatch',
'--job-name', job_name,
'--output', f'/fsx/%u/logs/%x-%j.log',
'--time', '1:50:00',
'--qos', 'normal',
'--partition', 'hopper-prod',
'--gpus', str(gpus),
'--ntasks', '1',
'--cpus-per-task', str(cpus_per_task),
'--mem-per-cpu', str(MEM_PER_CPU_GB) + 'G',
'--nodes', '1',
':',
'--gpus', '1',
'--ntasks', '1',
'--cpus-per-task', str(CPUS_PER_GPU),
'--mem-per-cpu', str(MEM_PER_CPU_GB) + 'G',
'--nodes', '1',
f'{engine}.slurm']
env = os.environ.copy()
env['MODEL'] = model[0]
process = subprocess.run(args, capture_output=True,
env=env)
print(process.stdout.decode())
print(process.stderr.decode())
if process.returncode != 0:
print(f'Error while submitting :: {args}')
exit(1)


if __name__ == '__main__':
Expand Down
14 changes: 10 additions & 4 deletions extra/slurm/tgi.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#SBATCH hetjob
#SBATCH --gpus 1 --ntasks 1 --cpus-per-task 11 --mem-per-cpu 20G --nodes=1


if [ -z "$MODEL" ]; then
echo "MODEL environment variable is not set"
exit 1
Expand All @@ -17,7 +16,10 @@ fi
echo "Starting TGI benchmark for $MODEL"
export RUST_BACKTRACE=full
export RUST_LOG=text_generation_inference_benchmark=info
export PORT=8090

# set a random available port to avoid conflicts
PORT=$(shuf -i 8000-9999 -n 1)
export PORT

echo "Model will run on ${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}"
echo "Benchmark will run on ${SLURM_JOB_NODELIST_HET_GROUP_1}"
Expand All @@ -38,7 +40,7 @@ srun --het-group=0 \
--cuda-graphs="1,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120,128"&

# wait until /health is available, die after 5 minutes
timeout 300 bash -c "while [[ \"\$(curl -s -o /dev/null -w '%{http_code}' http://localhost:${PORT}/health)\" != \"200\" ]]; do sleep 1 && echo \"Waiting for TGI to start...\"; done" || exit 1
timeout 600 bash -c "while [[ \"\$(curl -s -o /dev/null -w '%{http_code}' http://localhost:${PORT}/health)\" != \"200\" ]]; do sleep 1 && echo \"Waiting for TGI to start...\"; done" || exit 1
exit_code=$?

RESULTS_DIR="/fsx/$USER/benchmarks_results/tgi"
Expand All @@ -47,6 +49,7 @@ mkdir -p "${RESULTS_DIR}"
if [[ $exit_code != 124 ]]; then
# run benchmark
echo "Starting benchmark"
VERSION=$(curl -s http://${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}/info | jq -r '.version')
srun --het-group=1 \
-u \
-n 1 \
Expand All @@ -59,9 +62,12 @@ if [[ $exit_code != 124 ]]; then
--url "http://${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}" \
--duration 120s \
--warmup 30s \
--num-rates 30 \
--benchmark-kind rate \
--rates 0.8 --rates 1.6 --rates 2.4 --rates 3.2 --rates 4.0 --rates 4.8 --rates 5.6 --rates 6.4 --rates 7.2 --rates 8.0 --rates 8.8 --rates 9.6 --rates 10.4 --rates 11.2 --rates 12.0 --rates 12.8 --rates 13.6 --rates 14.4 --rates 15.2 --rates 16.0 --rates 16.8 --rates 17.6 --rates 18.4 --rates 19.2 --rates 20.0 --rates 20.8 --rates 21.6 --rates 22.4 --rates 23.2 --rates 24.0 \
--prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--extra-meta "version=$VERSION" \
--extra-meta "engine=\"TGI\"" \
--no-console
fi

Expand Down
12 changes: 9 additions & 3 deletions extra/slurm/vllm.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ fi
echo "Starting vLLM benchmark for $MODEL"
export RUST_BACKTRACE=full
export RUST_LOG=text_generation_inference_benchmark=info
export PORT=8090
# set a random available port to avoid conflicts
PORT=$(shuf -i 8000-9999 -n 1)
export PORT

echo "Model will run on ${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}"
echo "Benchmark will run on ${SLURM_JOB_NODELIST_HET_GROUP_1}"
Expand All @@ -37,7 +39,7 @@ srun --het-group=0 \
--tensor-parallel-size "${SLURM_GPUS_ON_NODE}"&

# wait until /health is available, die after 5 minutes
timeout 300 bash -c "while [[ \"\$(curl -s -o /dev/null -w '%{http_code}' http://localhost:${PORT}/health)\" != \"200\" ]]; do sleep 1 && echo \"Waiting for vLLM to start...\"; done" || exit 1
timeout 600 bash -c "while [[ \"\$(curl -s -o /dev/null -w '%{http_code}' http://localhost:${PORT}/health)\" != \"200\" ]]; do sleep 1 && echo \"Waiting for vLLM to start...\"; done" || exit 1
exit_code=$?

RESULTS_DIR="/fsx/$USER/benchmarks_results/vllm"
Expand All @@ -46,6 +48,7 @@ mkdir -p "${RESULTS_DIR}"
if [[ $exit_code != 124 ]]; then
# run benchmark
echo "Starting benchmark"
VERSION=$(curl -s http://${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}/version | jq -r '.version')
srun --het-group=1 \
-u \
-n 1 \
Expand All @@ -58,9 +61,12 @@ if [[ $exit_code != 124 ]]; then
--url "http://${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}" \
--duration 120s \
--warmup 30s \
--num-rates 30 \
--benchmark-kind rate \
--rates 0.8 --rates 1.6 --rates 2.4 --rates 3.2 --rates 4.0 --rates 4.8 --rates 5.6 --rates 6.4 --rates 7.2 --rates 8.0 --rates 8.8 --rates 9.6 --rates 10.4 --rates 11.2 --rates 12.0 --rates 12.8 --rates 13.6 --rates 14.4 --rates 15.2 --rates 16.0 --rates 16.8 --rates 17.6 --rates 18.4 --rates 19.2 --rates 20.0 --rates 20.8 --rates 21.6 --rates 22.4 --rates 23.2 --rates 24.0 \
--prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--extra-meta "version=$VERSION" \
--extra-meta "engine=\"vLLM\"" \
--no-console
fi

Expand Down
30 changes: 17 additions & 13 deletions plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
pd.options.mode.copy_on_write = True


def plot(data_files: dict[str, str]):
def plot(model:str,data_files: dict[str, str]):
df = pd.DataFrame()
# Load the results
for key, filename in data_files.items():
Expand All @@ -23,15 +23,16 @@ def plot(data_files: dict[str, str]):
entry['engine'] = key
del entry['config']
df = pd.concat([df, pd.DataFrame(entry, index=[0])])

# Filter the results
constant_rate = df[
(df['executor_type'] == 'ConstantArrivalRate') & (df['id'] != 'warmup') & (df['id'] != 'throughput')]
constant_vus = df[(df['executor_type'] == 'ConstantVUs') & (df['id'] != 'warmup') & (df['id'] != 'throughput')]
if len(constant_rate) > 0:
plot_inner('Requests/s', 'rate', constant_rate, 'Constant Rate benchmark')
plot_inner('Requests/s', 'rate', constant_rate, f'Constant Rate benchmark\n{model}')
plt.savefig(f'{directory}/{model}_constant_rate.png')
if len(constant_vus) > 0:
plot_inner('VUs', 'max_vus', constant_vus, 'Constant VUs benchmark')
plot_inner('VUs', 'max_vus', constant_vus, f'Constant VUs benchmark\n{model}')
plt.savefig(f'{directory}/{model}_constant_vus.png')


def plot_inner(x_title, x_key, results, chart_title):
Expand Down Expand Up @@ -59,7 +60,7 @@ def plot_inner(x_title, x_key, results, chart_title):
for i, engine in enumerate(results['engine'].unique()):
df_sorted = results[results['engine'] == engine].sort_values(by=x_key)
ax.plot(df_sorted[x_key], df_sorted[metric], marker='o', markersize=2,
color=colors[i % len(colors)] if engine != 'tgi' else '#FF9D00',
color=colors[i % len(colors)] if not engine.lower().startswith('tgi') else '#FF9D00',
label=f"{engine}")
ax.set_title(title)
ax.tick_params(axis='x', rotation=0)
Expand All @@ -80,14 +81,17 @@ def plot_inner(x_title, x_key, results, chart_title):
ax.legend(title='Engine', loc='upper right')
plt.suptitle(chart_title, fontsize=16)

plt.show()
#plt.show()


if __name__ == '__main__':
directory='results/llama-70B'
# list json files in results directory
data_files = {}
for filename in os.listdir(directory):
if filename.endswith('.json'):
data_files[filename.split('.')[0]] = f'{directory}/{filename}'
plot(data_files)
results_dir = 'results'
# list directories
directories = [f'{results_dir}/{d}' for d in os.listdir(results_dir) if os.path.isdir(f'{results_dir}/{d}')]
for directory in directories:
# list json files in results directory
data_files = {}
for filename in os.listdir(directory):
if filename.endswith('.json'):
data_files[filename.split('.')[-2]] = f'{directory}/{filename}'
plot(directory.split('/')[-1], data_files)
Loading

0 comments on commit 609ae8c

Please sign in to comment.