Skip to content

Commit

Permalink
feat: working Slurm benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugoch committed Sep 20, 2024
1 parent aa6a38f commit 36f9329
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 8 deletions.
4 changes: 2 additions & 2 deletions extra/slurm/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

def main():
models = [
# ('meta-llama/Meta-Llama-3.1-8B-Instruct', 1),
('meta-llama/Meta-Llama-3.1-8B-Instruct', 1),
('meta-llama/Meta-Llama-3.1-70B-Instruct', 4),
# ('mistralai/Mixtral-8x7B-Instruct-v0.1', 2),
('mistralai/Mixtral-8x7B-Instruct-v0.1', 2),
]
engines = ['tgi', 'vllm']
for model in models:
Expand Down
5 changes: 3 additions & 2 deletions extra/slurm/tgi.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ srun --het-group=0 \
/usr/local/bin/text-generation-launcher \
--model-id $MODEL \
--max-concurrent-requests 512 \
--max-waiting-tokens 5 \
--cuda-graphs="1,8,16,24,32,40,48,56,64,72,80,88,96,104,112,120,128"&

# wait until /health is available, die after 5 minutes
Expand All @@ -56,9 +57,9 @@ if [[ $exit_code != 124 ]]; then
--tokenizer-name "$MODEL" \
--max-vus 800 \
--url "http://${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}" \
--duration 30s \
--duration 120s \
--warmup 30s \
--num-rates 2 \
--num-rates 30 \
--prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--no-console
Expand Down
4 changes: 2 additions & 2 deletions extra/slurm/vllm.slurm
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ if [[ $exit_code != 124 ]]; then
--tokenizer-name "$MODEL" \
--max-vus 800 \
--url "http://${SLURM_JOB_NODELIST_HET_GROUP_0}:${PORT}" \
--duration 30s \
--duration 120s \
--warmup 30s \
--num-rates 2 \
--num-rates 30 \
--prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" \
--no-console
Expand Down
5 changes: 3 additions & 2 deletions plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ def plot_inner(x_title, x_key, results, chart_title):


if __name__ == '__main__':
directory='results/llama-70B'
# list json files in results directory
data_files = {}
for filename in os.listdir('results/llama-70B'):
for filename in os.listdir(directory):
if filename.endswith('.json'):
data_files[filename.split('.')[0]] = f'results/{filename}'
data_files[filename.split('.')[0]] = f'{directory}/{filename}'
plot(data_files)

0 comments on commit 36f9329

Please sign in to comment.