Skip to content

Commit

Permalink
Add support for ROCm 6.1.2 (#4)
Browse files Browse the repository at this point in the history
Extraneous libraries were included for various backends. This also adds
support for pytorch and onnxruntime with ROCm 6.1.2.
  • Loading branch information
ashao authored Sep 18, 2024
1 parent 2028721 commit f8255f9
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 7 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ build_torch: $(TORCH_ARCHIVE)

.PHONY: clean_torch
clean_torch:
rm -rf $(TORCH_BUILD_DIR) $(TORCH_ARCHIVE) $(TORCH_INSTALL_DIR)
cd pytorch && git clean -fdx && git restore .
cd pytorch/third_party/kineto && git restore .

Expand All @@ -94,7 +93,8 @@ clean_tensorflow:
rm -rf $(TF_INSTALL_DIR)
cd tensorflow && \
bazel clean --expunge_async && \
git restore .
git restore . && \
git reset --hard

.PHONY: clean_onnxruntime
clean_onnxruntime:
Expand Down
94 changes: 94 additions & 0 deletions architectures/linux-rocm-6.1.2.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@

# BSD 2-Clause License
#
# Copyright (c) 2024, Hewlett Packard Enterprise
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

OS = linux
ARCHITECTURE = x64
ROCM_VERSION = 6.1.2
STACK=rocm-$(ROCM_VERSION)

# pyTorch options
PYTORCH_VERSION = 2.4.0
TORCH_CMAKE_OPTIONS = -DBUILD_PYTHON=OFF
TORCH_CMAKE_OPTIONS += -DUSE_ROCM=ON -DUSE_RCCL=ON -DROCM_SOURCE_DIR=${ROCM_PATH}
TORCH_CMAKE_OPTIONS += -DUSE_NCCL=OFF -DUSE_CUDA=OFF -DUSE_STATIC_MKL=ON
PYTORCH_PREBUILD_TARGETS = pytorch_rocm_checkout pytorch_rocm_prebuild


# Tensorflow options
TF_VERSION = 2.15
TF_TAG = r$(TF_VERSION)-rocm-enhanced
TF_REMOTE = https://github.com/ROCm/tensorflow-upstream.git
TF_PREBUILD_TARGETS = tf_rocm_checkout tf_rocm_prebuild
TF_BAZEL_OPTS = --config=opt --verbose_failures

ONNXRT_VERSION = 1.17.3
ONNXRT_OPTIONS = --use_rocm --rocm_home $(ROCM_PATH)
ONNXRT_PREBUILD_TARGETS = onnxrt_checkout
# No prebuild steps for ONNX

# From PyTorch for ROCm instructions
# https://github.com/pytorch/pytorch/blob/v2.3.1/README.md?plain=1#L241-L245
# For at ROCm 5.5.0 and later, also need to patch one of the ATen files
pytorch_rocm_checkout:
cd pytorch && \
git checkout v${PYTORCH_VERSION} && \
git submodule update --init --recursive && \
git reset --hard

pytorch_rocm_prebuild:
cd pytorch; python tools/amd_build/build_amd.py
sed -i 's/attr.memoryType/attr.type/g' pytorch/aten/src/ATen/hip/detail/HIPHooks.cpp
sed -i 's,/opt/rocm,${ROCM_PATH},g' pytorch/third_party/kineto/libkineto/CMakeLists.txt
sed -i 's,\.,\\.,g' pytorch/cmake/public/LoadHIP.cmake

# (1) Patch .bazelrc to avoid hard-coded paths to Clang
# (2) Run the bazel configure script
tf_rocm_prebuild:
cd tensorflow; \
git restore .bazelrc
# git apply ../patches/tensorflow/bazelrc.rocm.patch
cd tensorflow; \
USE_DEFAULT_PYTHON_LIB_PATH=1 \
PYTHON_BIN_PATH=$$(which python) \
TF_NEED_CLANG=0 \
TF_NEED_ROCM=1 \
TF_NEED_CUDA=0 \
CC_OPT_FLAGS="-Wno-sign-compare -B/usr/bin" \
TF_SET_ANDROID_WORKSPACE=0 \
python configure.py

tf_rocm_checkout:
cd tensorflow; \
git fetch $(TF_REMOTE) $(TF_TAG) && \
git checkout FETCH_HEAD

onnxrt_checkout:
cd onnxruntime && \
git checkout v$(ONNXRT_VERSION) && \
git reset --hard && \
git clean -xdf && \
git submodule update --init --recursive
10 changes: 10 additions & 0 deletions environments/onnxruntime/pinoak-rocm-6.1.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module purge

export ROCM_VERSION=6.1.2

module load PrgEnv-gnu rocm/$ROCM_VERSION libffi libsqlite3 cmake cray-python

# Following come from hipconfig, not all variables set by ROCm module
export ROCM_PATH=/global/opt/rocm-$ROCM_VERSION


4 changes: 2 additions & 2 deletions environments/pytorch/pinoak-cuda-11.8.0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module purge

module load PrgEnv-gnu cudatoolkit/11.8.0 cudnn/8.9.7.29 gcc/11.2.0 ninja libffi libsqlite3 cray-python
export CC=gcc CXX=g++ FC=gfortran
module load PrgEnv-gnu cudatoolkit/11.8.0 cudnn/8.9.7.29 gcc/11.2.0 ninja cray-python
export CC=gcc CXX=g++ FC=gfortran
4 changes: 2 additions & 2 deletions environments/pytorch/pinoak-cuda-12.5.0
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module purge

module load PrgEnv-gnu cudatoolkit/12.5.0 cudnn/cuda-12/9.3.0.75 gcc/11.2.0 ninja libffi libsqlite3 cray-python
module load PrgEnv-gnu cudatoolkit/12.5.0 cudnn/cuda-12/9.3.0.75 gcc/11.2.0 ninja cray-python
export TORCH_CUDA_ARCH_LIST="5.0 5.1 5.3 6.0 6.1 6.2 7.0 7.2 7.5 8.0 8.6 8.7 8.9 9.0"
export CC=gcc CXX=g++ FC=gfortran
export CC=gcc CXX=g++ FC=gfortran
2 changes: 1 addition & 1 deletion environments/pytorch/pinoak-rocm-5.7.0
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
module purge

export ROCM_VERSION=5.7.0
module load PrgEnv-gnu rocm/5.7.0 ninja libffi libsqlite3 cray-python
module load PrgEnv-gnu rocm/$ROCM_VERSION ninja cray-python
export CC=gcc CXX=g++ FC=gfortran

# Following come from hipconfig, not all variables set by ROCm module
Expand Down
14 changes: 14 additions & 0 deletions environments/pytorch/pinoak-rocm-6.1.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module purge

export ROCM_VERSION=6.1.2
module load PrgEnv-gnu rocm/$ROCM_VERSION ninja cray-python
export CC=gcc CXX=g++ FC=gfortran

# Following come from hipconfig, not all variables set by ROCm module
export ROCM_PATH=$(hipconfig --rocmpath)
export ROCM_SOURCE_PATH=$ROCM_PATH
export HIP_PATH=$(hipconfig --path)
export HIP_LIB_PATH=$ROCM_PATH/lib

# Build for all ROCm architectures
export PYTORCH_ROCM_ARCH="gfx90a"

0 comments on commit f8255f9

Please sign in to comment.