Skip to content

Commit

Permalink
Enable opt-6.7b benchmark on inf2 (#2400)
Browse files Browse the repository at this point in the history
* Enable opt-6.7b benchmark on inf2

* Add support for inf2 dependency handling

* update benchmark requests and concurrency

---------

Co-authored-by: Naman Nandan <[email protected]>
  • Loading branch information
namannandan and Naman Nandan authored Jun 29, 2023
1 parent 2551a99 commit b260776
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 0 deletions.
2 changes: 2 additions & 0 deletions benchmarks/auto_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ def install_torchserve(skip_ts_install, hw, ts_version):
# install_dependencies.py
if hw == "gpu":
cmd = "python ts_scripts/install_dependencies.py --environment dev --cuda cu117"
elif hw == "neuronx":
cmd = "python ts_scripts/install_dependencies.py --environment dev --neuronx"
else:
cmd = "python ts_scripts/install_dependencies.py --environment dev"
execute(cmd, wait=True)
Expand Down
1 change: 1 addition & 0 deletions benchmarks/benchmark_config_neuronx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# or a list of model configure yaml files with full path
models:
- "bert_neuronx.yaml"
- "opt_6.7b_neuronx.yaml"

# benchmark on "cpu", "gpu", "neuron" or "neuronx".
# "cpu" is set if "hardware" is not specified
Expand Down
68 changes: 68 additions & 0 deletions benchmarks/models_config/opt_6.7b_neuronx.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
opt_6.7b_neuronx_batch_1:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_1.tar.gz
workers:
- 1
batch_delay: 100
batch_size:
- 1
input: "./examples/large_models/inferentia2/sample_text.txt"
requests: 2000
concurrency: 10
backend_profiling: False
exec_env: "local"
processors:
- "neuronx"

opt_6.7b_neuronx_batch_2:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_2.tar.gz
workers:
- 1
batch_delay: 100
batch_size:
- 2
input: "./examples/large_models/inferentia2/sample_text.txt"
requests: 2000
concurrency: 10
backend_profiling: False
exec_env: "local"
processors:
- "neuronx"

opt_6.7b_neuronx_batch_4:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_4.tar.gz
workers:
- 1
batch_delay: 100
batch_size:
- 4
input: "./examples/large_models/inferentia2/sample_text.txt"
requests: 2000
concurrency: 10
backend_profiling: False
exec_env: "local"
processors:
- "neuronx"

opt_6.7b_neuronx_batch_8:
scripted_mode:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/opt_6.7b_neuronx_batch_8.tar.gz
workers:
- 1
batch_delay: 100
batch_size:
- 8
input: "./examples/large_models/inferentia2/sample_text.txt"
requests: 2000
concurrency: 10
backend_profiling: False
exec_env: "local"
processors:
- "neuronx"
7 changes: 7 additions & 0 deletions requirements/neuronx.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--extra-index-url https://pip.repos.neuron.amazonaws.com
numpy==1.21.6
protobuf==3.20.3
grpcio-tools==1.48.2
neuronx-cc
torch-neuronx
transformers-neuronx
7 changes: 7 additions & 0 deletions requirements/torch_neuronx_linux.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
--extra-index-url https://download.pytorch.org/whl/cpu
-r torch_common.txt
torch==1.13.1+cpu
torchvision==0.14.1+cpu
torchtext==0.14.1
torchaudio==0.13.1+cpu
torchdata==0.5.1
19 changes: 19 additions & 0 deletions ts_scripts/install_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ def install_torch_packages(self, cuda_version):
os.system(
f"{sys.executable} -m pip install -U -r requirements/torch_{cuda_version}_{platform.system().lower()}.txt"
)
elif args.neuronx:
torch_neuronx_requirements_file = os.path.join(
"requirements", "torch_neuronx_linux.txt"
)
os.system(
f"{sys.executable} -m pip install -U -r {torch_neuronx_requirements_file}"
)
else:
os.system(
f"{sys.executable} -m pip install -U -r requirements/torch_{platform.system().lower()}.txt"
Expand Down Expand Up @@ -67,6 +74,13 @@ def install_python_packages(self, cuda_version, requirements_file_path, nightly)
gpu_requirements_file = os.path.join("requirements", "common_gpu.txt")
os.system(f"{sys.executable} -m pip install -U -r {gpu_requirements_file}")

# Install dependencies for Inferentia2
if args.neuronx:
neuronx_requirements_file = os.path.join("requirements", "neuronx.txt")
os.system(
f"{sys.executable} -m pip install -U -r {neuronx_requirements_file}"
)

def install_node_packages(self):
os.system(
f"{self.sudo_cmd}npm install -g newman newman-reporter-htmlextra markdown-link-check"
Expand Down Expand Up @@ -193,6 +207,11 @@ def get_brew_version():
choices=["cu92", "cu101", "cu102", "cu111", "cu113", "cu116", "cu117", "cu118"],
help="CUDA version for torch",
)
parser.add_argument(
"--neuronx",
action="store_true",
help="Install dependencies for inferentia2 support",
)
parser.add_argument(
"--environment",
default="prod",
Expand Down

0 comments on commit b260776

Please sign in to comment.