From 9bcbd22262f44ee28d1b9b1a77be790e91b56dcd Mon Sep 17 00:00:00 2001 From: jakki-amd Date: Thu, 19 Dec 2024 21:40:44 +0200 Subject: [PATCH] 740 add generic support for different gpu hardware (#3371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add AMD backend support * Add AMD frontend support * Add Dockerfile.rocm Co-authored-by: Samu Tamminen * Add AMD documentation * Fix null pointer bug with populateAccelerators trying to get null AppleUtil GPU env value * Fix formatting --------- Co-authored-by: Rony Leppänen Co-authored-by: Anders Smedegaard Pedersen Co-authored-by: Samu Tamminen --- .gitignore | 6 + CONTRIBUTING.md | 43 +-- README.md | 10 +- docker/Dockerfile.rocm | 320 ++++++++++++++++++ docker/README.md | 1 + docs/contents.rst | 8 +- docs/hardware_support/amd_support.md | 81 +++++ .../apple_silicon_support.md | 34 +- docs/hardware_support/hardware_support.rst | 8 + docs/{ => hardware_support}/linux_aarch64.md | 0 docs/{ => hardware_support}/nvidia_mps.md | 0 frontend/build.gradle | 4 +- .../org/pytorch/serve/device/Accelerator.java | 90 +++++ .../serve/device/AcceleratorVendor.java | 9 + .../org/pytorch/serve/device/SystemInfo.java | 173 ++++++++++ .../interfaces/IAcceleratorUtility.java | 226 +++++++++++++ .../device/interfaces/ICsvSmiParser.java | 65 ++++ .../device/interfaces/IJsonSmiParser.java | 39 +++ .../pytorch/serve/device/utils/AppleUtil.java | 109 ++++++ .../pytorch/serve/device/utils/CudaUtil.java | 66 ++++ .../pytorch/serve/device/utils/ROCmUtil.java | 118 +++++++ .../pytorch/serve/device/utils/XpuUtil.java | 90 +++++ .../java/org/pytorch/serve/util/ApiUtils.java | 3 +- .../org/pytorch/serve/util/ConfigManager.java | 89 +---- .../pytorch/serve/wlm/WorkerLifeCycle.java | 4 +- .../org/pytorch/serve/wlm/WorkerThread.java | 54 +-- .../org/pytorch/serve/ModelServerTest.java | 52 ++- .../pytorch/serve/device/AcceleratorTest.java | 76 +++++ .../pytorch/serve/device/SystemInfoTest.java | 47 +++ .../serve/device/utils/AppleUtilTest.java | 121 +++++++ .../serve/device/utils/CudaUtilTest.java | 132 ++++++++ .../serve/device/utils/ROCmUtilTest.java | 143 ++++++++ .../serve/device/utils/XpuUtilTest.java | 138 ++++++++ .../metrics/sample_amd_discovery.json | 26 ++ .../resources/metrics/sample_amd_metrics.json | 46 +++ .../metrics/sample_amd_updated_metrics.json | 46 +++ .../resources/metrics/sample_apple_smi.json | 33 ++ frontend/server/testng.xml | 6 +- kubernetes/kserve/tests/scripts/test_mnist.sh | 52 ++- requirements/common_gpu.txt | 2 - requirements/torch_rocm60.txt | 5 + requirements/torch_rocm61.txt | 4 + requirements/torch_rocm62.txt | 4 + ts/metrics/metric_collector.py | 13 +- ts/metrics/system_metrics.py | 80 +++-- ts/torch_handler/base_handler.py | 8 +- ts_scripts/install_dependencies.py | 49 ++- ts_scripts/install_utils | 37 +- ts_scripts/print_env_info.py | 147 ++++---- ts_scripts/sanity_utils.py | 34 +- ts_scripts/utils.py | 12 +- ts_scripts/validate_model_on_gpu.py | 11 +- 52 files changed, 2609 insertions(+), 365 deletions(-) create mode 100644 docker/Dockerfile.rocm create mode 100644 docs/hardware_support/amd_support.md rename docs/{ => hardware_support}/apple_silicon_support.md (90%) create mode 100644 docs/hardware_support/hardware_support.rst rename docs/{ => hardware_support}/linux_aarch64.md (100%) rename docs/{ => hardware_support}/nvidia_mps.md (100%) create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/Accelerator.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/SystemInfo.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IAcceleratorUtility.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/interfaces/ICsvSmiParser.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IJsonSmiParser.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/utils/CudaUtil.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/utils/ROCmUtil.java create mode 100644 frontend/server/src/main/java/org/pytorch/serve/device/utils/XpuUtil.java create mode 100644 frontend/server/src/test/java/org/pytorch/serve/device/AcceleratorTest.java create mode 100644 frontend/server/src/test/java/org/pytorch/serve/device/SystemInfoTest.java create mode 100644 frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java create mode 100644 frontend/server/src/test/java/org/pytorch/serve/device/utils/CudaUtilTest.java create mode 100644 frontend/server/src/test/java/org/pytorch/serve/device/utils/ROCmUtilTest.java create mode 100644 frontend/server/src/test/java/org/pytorch/serve/device/utils/XpuUtilTest.java create mode 100644 frontend/server/src/test/resources/metrics/sample_amd_discovery.json create mode 100644 frontend/server/src/test/resources/metrics/sample_amd_metrics.json create mode 100644 frontend/server/src/test/resources/metrics/sample_amd_updated_metrics.json create mode 100644 frontend/server/src/test/resources/metrics/sample_apple_smi.json delete mode 100644 requirements/common_gpu.txt create mode 100644 requirements/torch_rocm60.txt create mode 100644 requirements/torch_rocm61.txt create mode 100644 requirements/torch_rocm62.txt diff --git a/.gitignore b/.gitignore index c565bcb504..08d805f470 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,9 @@ instances.yaml.backup # cpp cpp/_build cpp/third-party + +# projects +.tool-versions +**/*/.classpath +**/*/.settings +**/*/.project diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a25e754761..952bb1fb5b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,18 +11,7 @@ Your contributions will fall into two categories: - Search for your issue here: https://github.com/pytorch/serve/issues (look for the "good first issue" tag if you're a first time contributor) - Pick an issue and comment on the task that you want to work on this feature. - To ensure your changes doesn't break any of the existing features run the sanity suite as follows from serve directory: - - Install dependencies (if not already installed) - For CPU - - ```bash - python ts_scripts/install_dependencies.py --environment=dev - ``` - - For GPU - ```bash - python ts_scripts/install_dependencies.py --environment=dev --cuda=cu121 - ``` - > Supported cuda versions as cu121, cu118, cu117, cu116, cu113, cu111, cu102, cu101, cu92 + - [Install dependencies](#Install-TorchServe-for-development) (if not already installed) - Install `pre-commit` to your Git flow: ```bash pre-commit install @@ -60,26 +49,30 @@ pytest -k test/pytest/test_mnist_template.py If you plan to develop with TorchServe and change some source code, you must install it from source code. -Ensure that you have `python3` installed, and the user has access to the site-packages or `~/.local/bin` is added to the `PATH` environment variable. +1. Clone the repository, including third-party modules, with `git clone --recurse-submodules --remote-submodules git@github.com:pytorch/serve.git` +2. Ensure that you have `python3` installed, and the user has access to the site-packages or `~/.local/bin` is added to the `PATH` environment variable. +3. Run the following script from the top of the source directory. NOTE: This script force re-installs `torchserve`, `torch-model-archiver` and `torch-workflow-archiver` if existing installations are found -Run the following script from the top of the source directory. + #### For Debian Based Systems/MacOS -NOTE: This script force re-installs `torchserve`, `torch-model-archiver` and `torch-workflow-archiver` if existing installations are found + ``` + python ./ts_scripts/install_dependencies.py --environment=dev + python ./ts_scripts/install_from_src.py --environment=dev + ``` + ##### Installing Dependencies for Accelerator Support + Use the optional `--rocm` or `--cuda` flag with `install_dependencies.py` for installing accelerator specific dependencies. -#### For Debian Based Systems/ MacOS - -``` -python ./ts_scripts/install_dependencies.py --environment=dev -python ./ts_scripts/install_from_src.py --environment=dev -``` + Possible values are + - rocm: `rocm61`, `rocm60` + - cuda: `cu111`, `cu102`, `cu101`, `cu92` -Use `--cuda` flag with `install_dependencies.py` for installing cuda version specific dependencies. Possible values are `cu111`, `cu102`, `cu101`, `cu92` + For example `python ./ts_scripts/install_dependencies.py --environment=dev --rocm=rocm61` -#### For Windows + #### For Windows -Refer to the documentation [here](docs/torchserve_on_win_native.md). + Refer to the documentation [here](docs/torchserve_on_win_native.md). -For information about the model archiver, see [detailed documentation](model-archiver/README.md). + For information about the model archiver, see [detailed documentation](model-archiver/README.md). ### What to Contribute? diff --git a/README.md b/README.md index a74b952708..200dcc5269 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,10 @@ curl http://127.0.0.1:8080/predictions/bert -T input.txt ```bash # Install dependencies -# cuda is optional +python ./ts_scripts/install_dependencies.py + +# Include dependencies for accelerator support with the relevant optional flags +python ./ts_scripts/install_dependencies.py --rocm=rocm61 python ./ts_scripts/install_dependencies.py --cuda=cu121 # Latest release @@ -36,7 +39,10 @@ pip install torchserve-nightly torch-model-archiver-nightly torch-workflow-archi ```bash # Install dependencies -# cuda is optional +python ./ts_scripts/install_dependencies.py + +# Include depeendencies for accelerator support with the relevant optional flags +python ./ts_scripts/install_dependencies.py --rocm=rocm61 python ./ts_scripts/install_dependencies.py --cuda=cu121 # Latest release diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm new file mode 100644 index 0000000000..a6f578ecb4 --- /dev/null +++ b/docker/Dockerfile.rocm @@ -0,0 +1,320 @@ +# syntax = docker/dockerfile:experimental +# +# This file can build images for cpu and gpu env. By default it builds image for CPU. +# Use following option to build image for cuda/GPU: --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 +# Here is complete command for GPU/cuda - +# $ DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE=nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 -t torchserve:latest . +# +# Following comments have been shamelessly copied from https://github.com/pytorch/pytorch/blob/master/Dockerfile +# +# NOTE: To build this you will need a docker version > 18.06 with +# experimental enabled and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ + +ARG BASE_IMAGE=ubuntu:24.04 +ARG BRANCH_NAME=master +# Note: +# Define here the default python version to be used in all later build-stages as default. +# ARG and ENV variables do not persist across stages (they're build-stage scoped). +# That is crucial for ARG PYTHON_VERSION, which otherwise becomes "" leading to nasty bugs, +# that don't let the build fail, but break current version handling logic and result +# in images with wrong python version. To fix that, we will restate the ARG PYTHON_VERSION +# on each build-stage. +ARG PYTHON_VERSION=3.11 + +FROM ${BASE_IMAGE} AS compile-image +ARG BASE_IMAGE=ubuntu:24.04 +ARG PYTHON_VERSION +ARG BUILD_NIGHTLY +ARG BUILD_FROM_SRC +ARG LOCAL_CHANGES +ARG BRANCH_NAME +ARG REPO_URL=https://github.com/pytorch/serve.git +ENV PYTHONUNBUFFERED TRUE + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + ca-certificates \ + g++ \ + python3-setuptools \ + python$PYTHON_VERSION \ + python$PYTHON_VERSION-dev \ + python$PYTHON_VERSION-venv \ + openjdk-17-jdk \ + curl \ + wget \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Make the virtual environment and "activating" it by adding it first to the path. +# From here on the python$PYTHON_VERSION interpreter is used and the packages +# are installed in /home/venv which is what we need for the "runtime-image" +RUN python$PYTHON_VERSION -m venv /home/venv +ENV PATH="/home/venv/bin:$PATH" + +ARG USE_ROCM_VERSION="" + +COPY ./ serve + +RUN \ + if echo "$LOCAL_CHANGES" | grep -q "false"; then \ + rm -rf /serve;\ + git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \ + fi + +WORKDIR "/serve" + +RUN cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +RUN \ + # Install ROCm version specific binary when ROCm version is specified as a build arg + if [ "$USE_ROCM_VERSION" ]; then \ + python$PYTHON_VERSION ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION; \ + # Install the binary with the latest CPU image on a ROCm base image + else \ + python$PYTHON_VERSION ./ts_scripts/install_dependencies.py;\ + fi; + +# Make sure latest version of torchserve is uploaded before running this +RUN \ + if echo "$BUILD_FROM_SRC" | grep -q "true"; then \ + python$PYTHON_VERSION -m pip install -r requirements/developer.txt \ + && python$PYTHON_VERSION ts_scripts/install_from_src.py;\ + elif echo "$BUILD_NIGHTLY" | grep -q "false"; then \ + python$PYTHON_VERSION -m pip install --no-cache-dir torchserve torch-model-archiver torch-workflow-archiver;\ + else \ + python$PYTHON_VERSION -m pip install --no-cache-dir torchserve-nightly torch-model-archiver-nightly torch-workflow-archiver-nightly;\ + fi + +# Final image for production +FROM ${BASE_IMAGE} AS production-image +# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) +ARG PYTHON_VERSION +ENV PYTHONUNBUFFERED TRUE +ARG USE_ROCM_VERSION + +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + python$PYTHON_VERSION \ + python3-setuptools \ + python$PYTHON_VERSION-dev \ + python$PYTHON_VERSION-venv \ + # using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905 + openjdk-17-jdk \ + build-essential \ + wget \ + && rm -rf /var/lib/apt/lists/* \ + && cd /tmp + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp + +COPY --chown=model-server --from=compile-image /home/venv /home/venv +COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh +ENV PATH="/home/venv/bin:$PATH" + +RUN \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + python$PYTHON_VERSION -m pip install -U pip setuptools \ + && python -m pip install /opt/rocm/share/amd_smi; \ + fi + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \ + && chown -R model-server /home/model-server + +COPY docker/config.properties /home/model-server/config.properties +RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store + +EXPOSE 8080 8081 8082 7070 7071 + +USER model-server +WORKDIR /home/model-server +ENV TEMP=/home/model-server/tmp +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["serve"] + +# Final image for docker regression +FROM ${BASE_IMAGE} AS ci-image +# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) +ARG PYTHON_VERSION +ARG BRANCH_NAME +ARG USE_ROCM_VERSION +ENV PYTHONUNBUFFERED TRUE + +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + python$PYTHON_VERSION \ + python3-setuptools \ + python$PYTHON_VERSION-dev \ + python$PYTHON_VERSION-venv \ + # using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905 + openjdk-17-jdk \ + build-essential \ + wget \ + numactl \ + nodejs \ + npm \ + zip \ + unzip \ + && npm install -g newman@5.3.2 newman-reporter-htmlextra markdown-link-check \ + && rm -rf /var/lib/apt/lists/* \ + && cd /tmp + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +COPY --from=compile-image /home/venv /home/venv + +ENV PATH="/home/venv/bin:$PATH" + +RUN \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + python$PYTHON_VERSION -m pip install -U pip setuptools \ + && python -m pip install /opt/rocm/share/amd_smi; \ + fi + +RUN python$PYTHON_VERSION -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt + +RUN mkdir /serve +ENV TS_RUN_IN_DOCKER True + +WORKDIR /serve +CMD ["python", "test/regression_tests.py"] + +#Final image for developer Docker image +FROM ${BASE_IMAGE} as dev-image +# Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top) +ARG PYTHON_VERSION +ARG BRANCH_NAME +ARG BUILD_FROM_SRC +ARG USE_ROCM_VERSION +ARG LOCAL_CHANGES +ARG BUILD_WITH_IPEX +ARG IPEX_VERSION=1.11.0 +ARG IPEX_URL=https://software.intel.com/ipex-whl-stable +ENV PYTHONUNBUFFERED TRUE +RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \ + apt-get update && \ + apt-get upgrade -y && \ + apt-get install software-properties-common -y && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + fakeroot \ + ca-certificates \ + dpkg-dev \ + sudo \ + g++ \ + git \ + python$PYTHON_VERSION \ + python$PYTHON_VERSION-dev \ + python3-setuptools \ + python$PYTHON_VERSION-venv \ + # using openjdk-17-jdk due to circular dependency(ca-certificates) bug in openjdk-17-jre-headless debian package + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1009905 + openjdk-17-jdk \ + build-essential \ + wget \ + curl \ + vim \ + numactl \ + nodejs \ + npm \ + zip \ + unzip \ + && npm install -g newman@5.3.2 newman-reporter-htmlextra markdown-link-check \ + && rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + apt-get update \ + && wget https://repo.radeon.com/amdgpu-install/6.2.2/ubuntu/noble/amdgpu-install_6.2.60202-1_all.deb \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y ./amdgpu-install_6.2.60202-1_all.deb \ + && apt-get update \ + && apt-get install --no-install-recommends -y amdgpu-dkms rocm; \ + else \ + echo "Skip ROCm installation"; \ + fi + +COPY ./ serve + +RUN \ + if echo "$LOCAL_CHANGES" | grep -q "false"; then \ + rm -rf /serve;\ + git clone --recursive $REPO_URL -b $BRANCH_NAME /serve; \ + fi + +COPY --from=compile-image /home/venv /home/venv +ENV PATH="/home/venv/bin:$PATH" + +RUN \ + if [ -n "$USE_ROCM_VERSION" ]; then \ + python$PYTHON_VERSION -m pip install -U pip setuptools \ + && python -m pip install /opt/rocm/share/amd_smi; \ + fi + +WORKDIR "serve" + +RUN python$PYTHON_VERSION -m pip install -U pip setuptools \ + && python$PYTHON_VERSION -m pip install --no-cache-dir -r requirements/developer.txt \ + && python$PYTHON_VERSION ts_scripts/install_from_src.py --environment=dev \ + && useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \ + && chmod +x /usr/local/bin/dockerd-entrypoint.sh \ + && chown -R model-server /home/model-server \ + && cp docker/config.properties /home/model-server/config.properties \ + && mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store \ + && chown -R model-server /home/venv +EXPOSE 8080 8081 8082 7070 7071 +WORKDIR /home/model-server +ENV TEMP=/home/model-server/tmp +ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"] +CMD ["serve"] diff --git a/docker/README.md b/docker/README.md index beb0604e10..9e5ca8a229 100644 --- a/docker/README.md +++ b/docker/README.md @@ -164,6 +164,7 @@ Creates a docker image with `torchserve` and `torch-model-archiver` installed fr ./build_image.sh -bt dev -g [-cv cu121|cu118] -cpp ``` +- For ROCm support (*experimental*), refer to [this documentation](../docs/hardware_support/amd_support.md). ## Start a container with a TorchServe image diff --git a/docs/contents.rst b/docs/contents.rst index 1ba7e83e32..c42a6a3076 100644 --- a/docs/contents.rst +++ b/docs/contents.rst @@ -16,9 +16,7 @@ model_zoo request_envelopes server - nvidia_mps snapshot - intel_extension_for_pytorch torchserve_on_win_native torchserve_on_wsl use_cases @@ -27,6 +25,12 @@ Security FAQs +.. toctree:: + :maxdepth: 0 + :caption: Hardware Support: + + hardware_support/hardware_support + .. toctree:: :maxdepth: 0 :caption: Service APIs: diff --git a/docs/hardware_support/amd_support.md b/docs/hardware_support/amd_support.md new file mode 100644 index 0000000000..55de40f6d4 --- /dev/null +++ b/docs/hardware_support/amd_support.md @@ -0,0 +1,81 @@ +# AMD Support + +TorchServe can be run on any combination of operating system and device that is +[supported by ROCm](https://rocm.docs.amd.com/projects/radeon/en/latest/docs/compatibility.html). + +## Supported Versions of ROCm + +The current stable `major.patch` version of ROCm and the previous path version will be supported. For example version `N.2` and `N.1` where `N` is the current major version. + +## Installation + + - Make sure you have **python >= 3.8 installed** on your system. + - clone the repo + ```bash + git clone git@github.com:pytorch/serve.git + ``` + + - cd into the cloned folder + + ```bash + cd serve + ``` + + - create a virtual environment for python + + ```bash + python -m venv venv + ``` + + - activate the virtual environment. If you use another shell (fish, csh, powershell) use the relevant option in from `/venv/bin/` + ```bash + source venv/bin/activate + ``` + + - install the dependencies needed for ROCm support. + + ```bash + python ./ts_scripts/install_dependencies.py --rocm=rocm61 + python ./ts_scripts/install_from_src.py + ``` + - enable amd-smi in the python virtual environment + ```bash + sudo chown -R $USER:$USER /opt/rocm/share/amd_smi/ + pip install -e /opt/rocm/share/amd_smi/ + ``` + +### Selecting Accelerators Using `HIP_VISIBLE_DEVICES` + +If you have multiple accelerators on the system where you are running TorchServe you can select which accelerators should be visible to TorchServe +by setting the environment variable `HIP_VISIBLE_DEVICES` to a string of 0-indexed comma-separated integers representing the ids of the accelerators. + +If you have 8 accelerators but only want TorchServe to see the last four of them do `export HIP_VISIBLE_DEVICES=4,5,6,7`. + +>ℹ️ **Not setting** `HIP_VISIBLE_DEVICES` will cause TorchServe to use all available accelerators on the system it is running on. + +> ⚠️ You can run into trouble if you set `HIP_VISIBLE_DEVICES` to an empty string. +> eg. `export HIP_VISIBLE_DEVICES=` or `export HIP_VISIBLE_DEVICES=""` +> use `unset HIP_VISIBLE_DEVICES` if you want to remove its effect. + +> ⚠️ Setting both `CUDA_VISIBLE_DEVICES` and `HIP_VISIBLE_DEVICES` may cause unintended behaviour and should be avoided. +> Doing so may cause an exception in the future. + +## Docker + +**In Development** + +`Dockerfile.rocm` provides preliminary ROCm support for TorchServe. + +Building and running `dev-image`: + +```bash +docker build --file docker/Dockerfile.rocm --target dev-image -t torch-serve-dev-image-rocm --build-arg USE_ROCM_VERSION=rocm62 --build-arg BUILD_FROM_SRC=true . + +docker run -it --rm --device=/dev/kfd --device=/dev/dri torch-serve-dev-image-rocm bash +``` + +## Example Usage + +After installing TorchServe with the required dependencies for ROCm you should be ready to serve your model. + +For a simple example, refer to `serve/examples/image_classifier/mnist/`. diff --git a/docs/apple_silicon_support.md b/docs/hardware_support/apple_silicon_support.md similarity index 90% rename from docs/apple_silicon_support.md rename to docs/hardware_support/apple_silicon_support.md index facd8a7f28..6e0f479b8a 100644 --- a/docs/apple_silicon_support.md +++ b/docs/hardware_support/apple_silicon_support.md @@ -1,19 +1,19 @@ -# Apple Silicon Support +# Apple Silicon Support -## What is supported +## What is supported * TorchServe CI jobs now include M1 hardware in order to ensure support, [documentation](https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories) on github M1 hardware. - - [Regression Tests](https://github.com/pytorch/serve/blob/master/.github/workflows/regression_tests_cpu.yml) - - [Regression binaries Test](https://github.com/pytorch/serve/blob/master/.github/workflows/regression_tests_cpu_binaries.yml) + - [Regression Tests](https://github.com/pytorch/serve/blob/master/.github/workflows/regression_tests_cpu.yml) + - [Regression binaries Test](https://github.com/pytorch/serve/blob/master/.github/workflows/regression_tests_cpu_binaries.yml) * For [Docker](https://docs.docker.com/desktop/install/mac-install/) ensure Docker for Apple silicon is installed then follow [setup steps](https://github.com/pytorch/serve/tree/master/docker) ## Experimental Support -* For GPU jobs on Apple Silicon, [MPS](https://pytorch.org/docs/master/notes/mps.html) is now auto detected and enabled. To prevent TorchServe from using MPS, users have to set `deviceType: "cpu"` in model-config.yaml. - * This is an experimental feature and NOT ALL models are guaranteed to work. +* For GPU jobs on Apple Silicon, [MPS](https://pytorch.org/docs/master/notes/mps.html) is now auto detected and enabled. To prevent TorchServe from using MPS, users have to set `deviceType: "cpu"` in model-config.yaml. + * This is an experimental feature and NOT ALL models are guaranteed to work. * Number of GPUs now reports GPUs on Apple Silicon -### Testing -* [Pytests](https://github.com/pytorch/serve/tree/master/test/pytest/test_device_config.py) that checks for MPS on MacOS M1 devices +### Testing +* [Pytests](https://github.com/pytorch/serve/tree/master/test/pytest/test_device_config.py) that checks for MPS on MacOS M1 devices * Models that have been tested and work: Resnet-18, Densenet161, Alexnet * Models that have been tested and DO NOT work: MNIST @@ -31,10 +31,10 @@ Config file: N/A Inference address: http://127.0.0.1:8080 Management address: http://127.0.0.1:8081 Metrics address: http://127.0.0.1:8082 -Model Store: +Model Store: Initial Models: resnet-18=resnet-18.mar -Log dir: -Metrics dir: +Log dir: +Metrics dir: Netty threads: 0 Netty client threads: 0 Default workers per model: 16 @@ -48,7 +48,7 @@ Custom python dependency for model allowed: false Enable metrics API: true Metrics mode: LOG Disable system metrics: false -Workflow Store: +Workflow Store: CPP log config: N/A Model config: N/A 024-04-08T14:18:02,380 [INFO ] main org.pytorch.serve.servingsdk.impl.PluginsManager - Loading snapshot serializer plugin... @@ -69,17 +69,17 @@ serve % curl http://127.0.0.1:8080/predictions/resnet-18 -T ./examples/image_cla } ... ``` -#### Conda Example +#### Conda Example ``` -(myenv) serve % pip list | grep torch +(myenv) serve % pip list | grep torch torch 2.2.1 torchaudio 2.2.1 torchdata 0.7.1 torchtext 0.17.1 torchvision 0.17.1 (myenv3) serve % conda install -c pytorch-nightly torchserve torch-model-archiver torch-workflow-archiver -(myenv3) serve % pip list | grep torch +(myenv3) serve % pip list | grep torch torch 2.2.1 torch-model-archiver 0.10.0b20240312 torch-workflow-archiver 0.2.12b20240312 @@ -119,11 +119,11 @@ System metrics command: default 2024-03-12T15:58:54,702 [DEBUG] main org.pytorch.serve.wlm.ModelManager - updateModel: densenet161, count: 10 Model server started. ... -(myenv3) serve % curl http://127.0.0.1:8080/predictions/densenet161 -T examples/image_classifier/kitten.jpg +(myenv3) serve % curl http://127.0.0.1:8080/predictions/densenet161 -T examples/image_classifier/kitten.jpg { "tabby": 0.46661922335624695, "tiger_cat": 0.46449029445648193, "Egyptian_cat": 0.0661405548453331, "lynx": 0.001292439759708941, "plastic_bag": 0.00022909720428287983 -} \ No newline at end of file +} diff --git a/docs/hardware_support/hardware_support.rst b/docs/hardware_support/hardware_support.rst new file mode 100644 index 0000000000..267525fc65 --- /dev/null +++ b/docs/hardware_support/hardware_support.rst @@ -0,0 +1,8 @@ +.. toctree:: + :caption: Hardware Support: + + amd_support + apple_silicon_support + linux_aarch64 + nvidia_mps + Intel Extension for PyTorch diff --git a/docs/linux_aarch64.md b/docs/hardware_support/linux_aarch64.md similarity index 100% rename from docs/linux_aarch64.md rename to docs/hardware_support/linux_aarch64.md diff --git a/docs/nvidia_mps.md b/docs/hardware_support/nvidia_mps.md similarity index 100% rename from docs/nvidia_mps.md rename to docs/hardware_support/nvidia_mps.md diff --git a/frontend/build.gradle b/frontend/build.gradle index 33920df5a3..cb5143dc11 100644 --- a/frontend/build.gradle +++ b/frontend/build.gradle @@ -37,8 +37,8 @@ def javaProjects() { configure(javaProjects()) { apply plugin: 'java-library' - sourceCompatibility = 1.8 - targetCompatibility = 1.8 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 defaultTasks 'jar' diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/Accelerator.java b/frontend/server/src/main/java/org/pytorch/serve/device/Accelerator.java new file mode 100644 index 0000000000..4692653ccf --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/Accelerator.java @@ -0,0 +1,90 @@ +package org.pytorch.serve.device; + +import java.text.MessageFormat; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; + +public class Accelerator { + public final Integer id; + public final AcceleratorVendor vendor; + public final String model; + public IAcceleratorUtility acceleratorUtility; + public Float usagePercentage; + public Float memoryUtilizationPercentage; + public Integer memoryAvailableMegabytes; + public Integer memoryUtilizationMegabytes; + + public Accelerator(String acceleratorName, AcceleratorVendor vendor, Integer gpuId) { + this.model = acceleratorName; + this.vendor = vendor; + this.id = gpuId; + this.usagePercentage = (float) 0.0; + this.memoryUtilizationPercentage = (float) 0.0; + this.memoryAvailableMegabytes = 0; + this.memoryUtilizationMegabytes = 0; + } + + // Getters + public Integer getMemoryAvailableMegaBytes() { + return memoryAvailableMegabytes; + } + + public AcceleratorVendor getVendor() { + return vendor; + } + + public String getAcceleratorModel() { + return model; + } + + public Integer getAcceleratorId() { + return id; + } + + public Float getUsagePercentage() { + return usagePercentage; + } + + public Float getMemoryUtilizationPercentage() { + return memoryUtilizationPercentage; + } + + public Integer getMemoryUtilizationMegabytes() { + return memoryUtilizationMegabytes; + } + + // Setters + public void setMemoryAvailableMegaBytes(Integer memoryAvailable) { + this.memoryAvailableMegabytes = memoryAvailable; + } + + public void setUsagePercentage(Float acceleratorUtilization) { + this.usagePercentage = acceleratorUtilization; + } + + public void setMemoryUtilizationPercentage(Float memoryUtilizationPercentage) { + this.memoryUtilizationPercentage = memoryUtilizationPercentage; + } + + public void setMemoryUtilizationMegabytes(Integer memoryUtilizationMegabytes) { + this.memoryUtilizationMegabytes = memoryUtilizationMegabytes; + } + + // Other Methods + public String utilizationToString() { + final String message = + MessageFormat.format( + "gpuId::{0} utilization.gpu::{1} % utilization.memory::{2} % memory.used::{3} MiB", + id, + usagePercentage, + memoryUtilizationPercentage, + memoryUtilizationMegabytes); + + return message; + } + + public void updateDynamicAttributes(Accelerator updated) { + this.usagePercentage = updated.usagePercentage; + this.memoryUtilizationPercentage = updated.memoryUtilizationPercentage; + this.memoryUtilizationMegabytes = updated.memoryUtilizationMegabytes; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java b/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java new file mode 100644 index 0000000000..22fd1f5d68 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java @@ -0,0 +1,9 @@ +package org.pytorch.serve.device; + +public enum AcceleratorVendor { + AMD, + NVIDIA, + INTEL, + APPLE, + UNKNOWN +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/SystemInfo.java b/frontend/server/src/main/java/org/pytorch/serve/device/SystemInfo.java new file mode 100644 index 0000000000..a26f85ef93 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/SystemInfo.java @@ -0,0 +1,173 @@ +package org.pytorch.serve.device; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; +import org.pytorch.serve.device.utils.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SystemInfo { + static final Logger logger = LoggerFactory.getLogger(SystemInfo.class); + // + // Contains information about the system (physical or virtual machine) + // we are running the workload on. + // Specifically how many accelerators and info about them. + // + + public AcceleratorVendor acceleratorVendor; + ArrayList accelerators; + private IAcceleratorUtility acceleratorUtil; + + public SystemInfo() { + // Detect and set the vendor of any accelerators in the system + this.acceleratorVendor = detectVendorType(); + this.accelerators = new ArrayList(); + + // If accelerators are present (vendor != UNKNOWN), + // initialize accelerator utilities + Optional.of(hasAccelerators()) + // Only proceed if hasAccelerators() returns true + .filter(Boolean::booleanValue) + // Execute this block if accelerators are present + .ifPresent( + hasAcc -> { + // Create the appropriate utility class based on vendor + this.acceleratorUtil = createAcceleratorUtility(); + // Populate the accelerators list based on environment + // variables and available devices + populateAccelerators(); + }); + + // Safely handle accelerator metrics update + Optional.ofNullable(accelerators) + // Only proceed if the accelerators list is not empty + .filter(list -> !list.isEmpty()) + // Update metrics (utilization, memory, etc.) for all accelerators if list + // exists and not empty + .ifPresent(list -> updateAcceleratorMetrics()); + } + + private IAcceleratorUtility createAcceleratorUtility() { + switch (this.acceleratorVendor) { + case AMD: + return new ROCmUtil(); + case NVIDIA: + return new CudaUtil(); + case INTEL: + return new XpuUtil(); + case APPLE: + return new AppleUtil(); + default: + return null; + } + } + + private void populateAccelerators() { + if (this.acceleratorUtil != null) { + String envVarName = this.acceleratorUtil.getGpuEnvVariableName(); + if (envVarName != null) { + String requestedAcceleratorIds = System.getenv(envVarName); + LinkedHashSet availableAcceleratorIds = + IAcceleratorUtility.parseVisibleDevicesEnv(requestedAcceleratorIds); + this.accelerators = + this.acceleratorUtil.getAvailableAccelerators(availableAcceleratorIds); + } else { + // Handle the case where envVarName is null + this.accelerators = + this.acceleratorUtil.getAvailableAccelerators(new LinkedHashSet<>()); + } + } else { + this.accelerators = new ArrayList<>(); + } + } + + boolean hasAccelerators() { + return this.acceleratorVendor != AcceleratorVendor.UNKNOWN; + } + + public Integer getNumberOfAccelerators() { + // since we instance create `accelerators` as an empty list + // in the constructor, the null check should be redundant. + // leaving it to be sure. + return (accelerators != null) ? accelerators.size() : 0; + } + + public static AcceleratorVendor detectVendorType() { + if (isCommandAvailable("rocm-smi")) { + return AcceleratorVendor.AMD; + } else if (isCommandAvailable("nvidia-smi")) { + return AcceleratorVendor.NVIDIA; + } else if (isCommandAvailable("xpu-smi")) { + return AcceleratorVendor.INTEL; + } else if (isCommandAvailable("system_profiler")) { + return AcceleratorVendor.APPLE; + } else { + return AcceleratorVendor.UNKNOWN; + } + } + + private static boolean isCommandAvailable(String command) { + String operatingSystem = System.getProperty("os.name").toLowerCase(); + String commandCheck = operatingSystem.contains("win") ? "where" : "which"; + ProcessBuilder processBuilder = new ProcessBuilder(commandCheck, command); + try { + Process process = processBuilder.start(); + int exitCode = process.waitFor(); + return exitCode == 0; + } catch (IOException | InterruptedException e) { + return false; + } + } + + public ArrayList getAccelerators() { + return this.accelerators; + } + + private void updateAccelerators(List updatedAccelerators) { + // Create a map of existing accelerators with ID as key + Map existingAcceleratorsMap = + this.accelerators.stream().collect(Collectors.toMap(acc -> acc.id, acc -> acc)); + + // Update existing accelerators and add new ones + this.accelerators = + updatedAccelerators.stream() + .map( + updatedAcc -> { + Accelerator existingAcc = + existingAcceleratorsMap.get(updatedAcc.id); + if (existingAcc != null) { + existingAcc.updateDynamicAttributes(updatedAcc); + return existingAcc; + } else { + return updatedAcc; + } + }) + .collect(Collectors.toCollection(ArrayList::new)); + } + + public void updateAcceleratorMetrics() { + if (this.acceleratorUtil != null) { + List updatedAccelerators = + this.acceleratorUtil.getUpdatedAcceleratorsUtilization(this.accelerators); + + updateAccelerators(updatedAccelerators); + } + } + + public AcceleratorVendor getAcceleratorVendor() { + return this.acceleratorVendor; + } + + public String getVisibleDevicesEnvName() { + if (this.accelerators.isEmpty() || this.accelerators == null) { + return null; + } + return this.accelerators.get(0).acceleratorUtility.getGpuEnvVariableName(); + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IAcceleratorUtility.java b/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IAcceleratorUtility.java new file mode 100644 index 0000000000..8bbe630c47 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IAcceleratorUtility.java @@ -0,0 +1,226 @@ +package org.pytorch.serve.device.interfaces; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.io.IOUtils; +import org.pytorch.serve.device.Accelerator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Provides functionality to detect hardware devices for accelerated workloads. For example GPUs. + */ +public interface IAcceleratorUtility { + static final Logger logger = LoggerFactory.getLogger(IAcceleratorUtility.class); + + /** + * Returns the name of the environment variable used to specify visible GPU devices. + * Implementing classes should define this based on their specific requirements. + * + *

Examples are 'HIP_VISIBLE_DEVICES', 'CUDA_VISIBLE_DEVICES' + * + * @return The name of the environment variable for visible GPU devices. + */ + String getGpuEnvVariableName(); + + /** + * Returns the SMI command specific to the implementing class. + * + * @return An array of strings representing the SMI command and its arguments for getting the + * utilizaiton stats for the available accelerators + */ + String[] getUtilizationSmiCommand(); + + /** + * Parses a string representation of visible devices into an {@code LinkedHashSet} of device + * identifiers. + * + *

This method processes a comma-separated list of device identifiers, typically obtained + * from an environment variable like {X}_VISIBLE_DEVICES. It performs validation and cleaning of + * the input string. + * + * @param visibleDevices A string containing comma-separated device identifiers. Can be null or + * empty. + * @return A LinkedHashSet of Integers each representing a device identifier. Returns an empty + * set if the input is null or empty. + * @throws IllegalArgumentException if the input string is not in the correct format (integers + * separated by commas, with or without spaces). + * @example // Returns [0, 1, 2] parseVisibleDevicesEnv("0,1,2") + *

// notice spaces between the commas and the next number // Returns [0, 1, 2] + * parseVisibleDevicesEnv("0, 1, 2") + *

// Returns [0, 2] parseVisibleDevicesEnv("0,0,2") + *

// Returns [] parseVisibleDevicesEnv("") + *

// Throws IllegalArgumentException parseVisibleDevicesEnv("0,1,a") + */ + static LinkedHashSet parseVisibleDevicesEnv(String visibleDevices) { + // return an empty set if null or an empty string is passed + if (visibleDevices == null || visibleDevices.isEmpty()) { + return new LinkedHashSet<>(); + } + + // Remove all spaces from the input + String cleaned = visibleDevices.replaceAll("\\s", ""); + + // Check if the cleaned string matches the pattern of integers separated by + // commas + if (!cleaned.matches("^\\d+(,\\d+)*$")) { + throw new IllegalArgumentException( + "Invalid format: The env defining visible devices must be integers separated by commas"); + } + + // split the string on comma, cast to Integer, and collect to a List + List allIntegers = + Arrays.stream(cleaned.split(",")) + .map(Integer::parseInt) + .collect(Collectors.toList()); + + // use Sets to deduplicate integers + LinkedHashSet uniqueIntegers = new LinkedHashSet<>(); + Set duplicates = + allIntegers.stream() + .filter(n -> !uniqueIntegers.add(n)) + .collect(Collectors.toSet()); + + if (!duplicates.isEmpty()) { + logger.warn( + "Duplicate GPU IDs found in {}: {}. Duplicates will be removed.", + visibleDevices, + duplicates); + } + + // return the set of unique integers + return uniqueIntegers; + } + + /** + * Parses the output of a system management interface (SMI) command to create a list of {@code + * Accelerator} objects with updated metrics. + * + * @param smiOutput The raw output string from the SMI command. + * @param parsed_gpu_ids A set of GPU IDs that have already been parsed. + * @return An {@code ArrayList} of {@code Accelerator} objects representing the parsed + * accelerators. + * @implNote The specific SMI command, output format, and environment variables will vary + * depending on the accelerator type. The SMI command should return core usage, memory + * utilization. Implementations should document these specifics in their method comments. If + * {@code parsed_gpu_ids} is empty, all accelerators found by the smi command should be + * returned. + * @throws IllegalArgumentException If the SMI output is invalid or cannot be parsed. + * @throws NullPointerException If either {@code smiOutput} or {@code parsed_gpu_ids} is null. + */ + ArrayList smiOutputToUpdatedAccelerators( + String smiOutput, LinkedHashSet parsed_gpu_ids); + + /** + * @param availableAcceleratorIds + * @return + */ + public ArrayList getAvailableAccelerators( + LinkedHashSet availableAcceleratorIds); + + /** + * Converts a number of bytes to megabytes. + * + *

This method uses the binary definition of a megabyte, where 1 MB = 1,048,576 bytes (1024 * + * 1024). The result is rounded to two decimal places. + * + * @param bytes The number of bytes to convert, as a long value. + * @return The equivalent number of megabytes, as a double value rounded to two decimal places. + */ + static Integer bytesToMegabytes(long bytes) { + final double BYTES_IN_MEGABYTE = 1024 * 1024; + return (int) (bytes / BYTES_IN_MEGABYTE); + } + + /** + * Executes an SMI (System Management Interface) command and returns its output. + * + *

This method runs the specified command using a ProcessBuilder, combines standard output + * and error streams, waits for the process to complete, and returns the output as a string. + * + * @param command An array of strings representing the SMI command and its arguments. + * @return A string containing the output of the SMI command. + * @throws AssertionError If the SMI command returns a non-zero exit code. + * @throws Error If an IOException or InterruptedException occurs during execution. The original + * exception is wrapped in the Error. + */ + static String callSMI(String[] command) { + try { + ProcessBuilder processBuilder = new ProcessBuilder(command); + processBuilder.redirectErrorStream(true); + Process process = processBuilder.start(); + int ret = process.waitFor(); + if (ret != 0) { + throw new AssertionError("SMI command returned a non-zero"); + } + + String output = IOUtils.toString(process.getInputStream(), StandardCharsets.UTF_8); + if (output.isEmpty()) { + throw new AssertionError("Unexpected smi response."); + } + return output; + + } catch (IOException | InterruptedException e) { + logger.debug("SMI command not available or failed: " + e.getMessage()); + throw new Error(e); + } + } + + /** + * Updates the utilization information for a list of accelerators. + * + *

This method retrieves the current utilization statistics for the given accelerators using + * a System Management Interface (SMI) command specific to the implementing class. It then + * parses the SMI output and returns an updated {@code ArrayList} of accelerator objects with + * the latest information. + * + * @param accelerators An ArrayList of Accelerator objects to be updated. Must not be null or + * empty. + * @return An ArrayList of updated Accelerator objects with the latest utilization information. + * @throws IllegalArgumentException If the input accelerators list is null or empty, or if the + * SMI command returned by getUtilizationSmiCommand() is null or empty. + * @throws RuntimeException If an error occurs while executing the SMI command or parsing its + * output. The specific exception will depend on the implementation of callSMI() and + * smiOutputToAccelerators(). + * @implNote This method uses getUtilizationSmiCommand() to retrieve the SMI command specific to + * the implementing class. Subclasses must implement this method to provide the correct + * command. The method also relies on callSMI() to execute the command and + * smiOutputToAccelerators() to parse the output, both of which must be implemented by the + * subclass. + * @implSpec The implementation first checks if the input is valid, then retrieves and validates + * the SMI command. It executes the command, extracts the GPU IDs from the input + * accelerators, and uses these to parse the SMI output into updated Accelerator objects. + * @see #getUtilizationSmiCommand() + * @see #callSMI(String[]) + * @see #smiOutputToUpdatedAccelerators(String, LinkedHashSet) + */ + default ArrayList getUpdatedAcceleratorsUtilization( + ArrayList accelerators) { + if (accelerators == null || accelerators.isEmpty()) { + logger.warn("No accelerators to update."); + throw new IllegalArgumentException( + "`accelerators` cannot be null or empty when trying to update the accelerator stats"); + } + + String[] smiCommand = getUtilizationSmiCommand(); + if (smiCommand == null || smiCommand.length == 0) { + throw new IllegalArgumentException( + "`smiCommand` cannot be null or empty when trying to update accelerator stats"); + } + + String smiOutput = callSMI(smiCommand); + LinkedHashSet acceleratorIds = + accelerators.stream() + .map(accelerator -> accelerator.id) + .collect(Collectors.toCollection(LinkedHashSet::new)); + ArrayList updatedAccelerators = + smiOutputToUpdatedAccelerators(smiOutput, acceleratorIds); + return updatedAccelerators; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/ICsvSmiParser.java b/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/ICsvSmiParser.java new file mode 100644 index 0000000000..98a7351467 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/ICsvSmiParser.java @@ -0,0 +1,65 @@ +package org.pytorch.serve.device.interfaces; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.function.Function; +import org.pytorch.serve.device.Accelerator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public interface ICsvSmiParser { + static final Logger csvSmiParserLogger = LoggerFactory.getLogger(ICsvSmiParser.class); + + /** + * Parses CSV output from SMI commands and converts it into a list of Accelerator objects. + * + * @param csvOutput The CSV string output from an SMI command. + * @param parsedAcceleratorIds A set of accelerator IDs to consider. If empty, all accelerators + * are included. + * @param parseFunction A function that takes an array of CSV fields and returns an Accelerator + * object. This function should handle the specific parsing logic for different SMI command + * outputs. + * @return An ArrayList of Accelerator objects parsed from the CSV output. + * @throws NumberFormatException If there's an error parsing numeric fields in the CSV. + *

This method provides a general way to parse CSV output from various SMI commands. It + * skips the header line of the CSV, then applies the provided parseFunction to each + * subsequent line. Accelerators are only included if their ID is in parsedAcceleratorIds, + * or if parsedAcceleratorIds is empty (indicating all accelerators should be included). + *

The parseFunction parameter allows for flexibility in handling different CSV formats + * from various SMI commands. This function should handle the specific logic for creating an + * Accelerator object from a line of CSV data. + */ + default ArrayList csvSmiOutputToAccelerators( + final String csvOutput, + final LinkedHashSet parsedGpuIds, + Function parseFunction) { + final ArrayList accelerators = new ArrayList<>(); + + List lines = Arrays.asList(csvOutput.split("\n")); + + final boolean addAll = parsedGpuIds.isEmpty(); + + lines.stream() + .skip(1) // Skip the header line + .forEach( + line -> { + final String[] parts = line.split(","); + try { + Accelerator accelerator = parseFunction.apply(parts); + if (accelerator != null + && (addAll + || parsedGpuIds.contains( + accelerator.getAcceleratorId()))) { + accelerators.add(accelerator); + } + } catch (final NumberFormatException e) { + csvSmiParserLogger.warn( + "Failed to parse GPU ID: " + parts[1].trim(), e); + } + }); + + return accelerators; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IJsonSmiParser.java b/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IJsonSmiParser.java new file mode 100644 index 0000000000..0a39ebfc91 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/interfaces/IJsonSmiParser.java @@ -0,0 +1,39 @@ +package org.pytorch.serve.device.interfaces; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import org.pytorch.serve.device.Accelerator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public interface IJsonSmiParser { + static final Logger jsonSmiParserLogger = LoggerFactory.getLogger(IJsonSmiParser.class); + + default ArrayList jsonOutputToAccelerators( + JsonElement rootObject, LinkedHashSet parsedAcceleratorIds) { + + ArrayList accelerators = new ArrayList<>(); + List acceleratorObjects = extractAccelerators(rootObject); + + for (JsonObject acceleratorObject : acceleratorObjects) { + Integer acceleratorId = extractAcceleratorId(acceleratorObject); + if (acceleratorId != null + && (parsedAcceleratorIds.isEmpty() + || parsedAcceleratorIds.contains(acceleratorId))) { + Accelerator accelerator = jsonObjectToAccelerator(acceleratorObject); + accelerators.add(accelerator); + } + } + + return accelerators; + } + + public Integer extractAcceleratorId(JsonObject jsonObject); + + public Accelerator jsonObjectToAccelerator(JsonObject jsonObject); + + public List extractAccelerators(JsonElement rootObject); +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java b/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java new file mode 100644 index 0000000000..3c32be3317 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/utils/AppleUtil.java @@ -0,0 +1,109 @@ +package org.pytorch.serve.device.utils; + +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; +import org.pytorch.serve.device.interfaces.IJsonSmiParser; + +public class AppleUtil implements IAcceleratorUtility, IJsonSmiParser { + + @Override + public String getGpuEnvVariableName() { + return null; // Apple doesn't use a GPU environment variable + } + + @Override + public String[] getUtilizationSmiCommand() { + return new String[] { + "system_profiler", "-json", "-detailLevel", "mini", "SPDisplaysDataType" + }; + } + + @Override + public ArrayList getAvailableAccelerators( + LinkedHashSet availableAcceleratorIds) { + String jsonOutput = IAcceleratorUtility.callSMI(getUtilizationSmiCommand()); + JsonObject rootObject = JsonParser.parseString(jsonOutput).getAsJsonObject(); + return jsonOutputToAccelerators(rootObject, availableAcceleratorIds); + } + + @Override + public ArrayList smiOutputToUpdatedAccelerators( + String smiOutput, LinkedHashSet parsedGpuIds) { + JsonObject rootObject = JsonParser.parseString(smiOutput).getAsJsonObject(); + return jsonOutputToAccelerators(rootObject, parsedGpuIds); + } + + @Override + public Accelerator jsonObjectToAccelerator(JsonObject gpuObject) { + String model = gpuObject.get("sppci_model").getAsString(); + if (!model.startsWith("Apple M")) { + return null; + } + + Accelerator accelerator = new Accelerator(model, AcceleratorVendor.APPLE, 0); + + // Set additional information + accelerator.setUsagePercentage(0f); // Not available from system_profiler + accelerator.setMemoryUtilizationPercentage(0f); // Not available from system_profiler + accelerator.setMemoryUtilizationMegabytes(0); // Not available from system_profiler + + return accelerator; + } + + @Override + public Integer extractAcceleratorId(JsonObject cardObject) { + // `system_profiler` only returns one object for + // the integrated GPU on M1, M2, M3 Macs + return 0; + } + + @Override + public List extractAccelerators(JsonElement rootObject) { + List accelerators = new ArrayList<>(); + JsonArray displaysArray = + rootObject + .getAsJsonObject() // Gets the outer object + .get("SPDisplaysDataType") // Gets the "SPDisplaysDataType" element + .getAsJsonArray(); + JsonObject gpuObject = displaysArray.get(0).getAsJsonObject(); + int number_of_cores = Integer.parseInt(gpuObject.get("sppci_cores").getAsString()); + + // add the object `number_of_cores` times to maintain the exsisitng + // functionality + accelerators = + IntStream.range(0, number_of_cores) + .mapToObj(i -> gpuObject) + .collect(Collectors.toList()); + + return accelerators; + } + + public ArrayList jsonOutputToAccelerators( + JsonObject rootObject, LinkedHashSet parsedAcceleratorIds) { + + ArrayList accelerators = new ArrayList<>(); + List acceleratorObjects = extractAccelerators(rootObject); + + for (JsonObject acceleratorObject : acceleratorObjects) { + Integer acceleratorId = extractAcceleratorId(acceleratorObject); + if (acceleratorId != null + && (parsedAcceleratorIds.isEmpty() + || parsedAcceleratorIds.contains(acceleratorId))) { + Accelerator accelerator = jsonObjectToAccelerator(acceleratorObject); + accelerators.add(accelerator); + } + } + + return accelerators; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/utils/CudaUtil.java b/frontend/server/src/main/java/org/pytorch/serve/device/utils/CudaUtil.java new file mode 100644 index 0000000000..b64faa57a4 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/utils/CudaUtil.java @@ -0,0 +1,66 @@ +package org.pytorch.serve.device.utils; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; +import org.pytorch.serve.device.interfaces.ICsvSmiParser; + +public class CudaUtil implements IAcceleratorUtility, ICsvSmiParser { + + @Override + public String getGpuEnvVariableName() { + return "CUDA_VISIBLE_DEVICES"; + } + + @Override + public String[] getUtilizationSmiCommand() { + String metrics = + String.join( + ",", + "index", + "gpu_name", + "utilization.gpu", + "utilization.memory", + "memory.used"); + return new String[] {"nvidia-smi", "--query-gpu=" + metrics, "--format=csv,nounits"}; + } + + @Override + public ArrayList getAvailableAccelerators( + LinkedHashSet availableAcceleratorIds) { + String[] command = {"nvidia-smi", "--query-gpu=index,gpu_name", "--format=csv,nounits"}; + + String smiOutput = IAcceleratorUtility.callSMI(command); + return csvSmiOutputToAccelerators( + smiOutput, availableAcceleratorIds, this::parseAccelerator); + } + + @Override + public ArrayList smiOutputToUpdatedAccelerators( + String smiOutput, LinkedHashSet parsedGpuIds) { + + return csvSmiOutputToAccelerators(smiOutput, parsedGpuIds, this::parseUpdatedAccelerator); + } + + public Accelerator parseAccelerator(String[] parts) { + int id = Integer.parseInt(parts[0].trim()); + String model = parts[1].trim(); + return new Accelerator(model, AcceleratorVendor.NVIDIA, id); + } + + public Accelerator parseUpdatedAccelerator(String[] parts) { + int id = Integer.parseInt(parts[0].trim()); + String model = parts[1].trim(); + Float usagePercentage = Float.parseFloat(parts[2].trim()); + Float memoryUtilizationPercentage = Float.parseFloat(parts[3].trim()); + int memoryUtilizationMegabytes = Integer.parseInt(parts[4].trim()); + + Accelerator accelerator = new Accelerator(model, AcceleratorVendor.NVIDIA, id); + accelerator.setUsagePercentage(usagePercentage); + accelerator.setMemoryUtilizationPercentage(memoryUtilizationPercentage); + accelerator.setMemoryUtilizationMegabytes(memoryUtilizationMegabytes); + return accelerator; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/utils/ROCmUtil.java b/frontend/server/src/main/java/org/pytorch/serve/device/utils/ROCmUtil.java new file mode 100644 index 0000000000..0b165469f7 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/utils/ROCmUtil.java @@ -0,0 +1,118 @@ +package org.pytorch.serve.device.utils; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; +import org.pytorch.serve.device.interfaces.IJsonSmiParser; + +public class ROCmUtil implements IAcceleratorUtility, IJsonSmiParser { + private static final Pattern GPU_ID_PATTERN = Pattern.compile("card(\\d+)"); + + @Override + public String getGpuEnvVariableName() { + return "HIP_VISIBLE_DEVICES"; + } + + @Override + public String[] getUtilizationSmiCommand() { + return new String[] { + "rocm-smi", + "--showid", + "--showproductname", + "--showuse", + "--showmemuse", + "--showmeminfo", + "vram", + "-P", + "--json" + }; + } + + @Override + public ArrayList getAvailableAccelerators( + LinkedHashSet availableAcceleratorIds) { + String[] smiCommand = {"rocm-smi", "--showproductname", "-P", "--json"}; + String jsonOutput = IAcceleratorUtility.callSMI(smiCommand); + + JsonObject rootObject = JsonParser.parseString(jsonOutput).getAsJsonObject(); + return jsonOutputToAccelerators(rootObject, availableAcceleratorIds); + } + + @Override + public ArrayList smiOutputToUpdatedAccelerators( + String smiOutput, LinkedHashSet parsedGpuIds) { + JsonObject rootObject = JsonParser.parseString(smiOutput).getAsJsonObject(); + return jsonOutputToAccelerators(rootObject, parsedGpuIds); + } + + @Override + public List extractAccelerators(JsonElement rootObject) { + JsonObject root = rootObject.getAsJsonObject(); + List accelerators = new ArrayList<>(); + for (String key : root.keySet()) { + if (GPU_ID_PATTERN.matcher(key).matches()) { + JsonObject accelerator = root.getAsJsonObject(key); + accelerator.addProperty("cardId", key); // Add the card ID to the JsonObject + accelerators.add(accelerator); + } + } + return accelerators; + } + + @Override + public Integer extractAcceleratorId(JsonObject jsonObject) { + String cardId = jsonObject.get("cardId").getAsString(); + Matcher matcher = GPU_ID_PATTERN.matcher(cardId); + if (matcher.matches()) { + return Integer.parseInt(matcher.group(1)); + } + return null; + } + + @Override + public Accelerator jsonObjectToAccelerator(JsonObject jsonObject) { + // Check if required field exists + if (!jsonObject.has("Card Series")) { + throw new IllegalArgumentException("Missing required field: Card Series"); + } + + String model = jsonObject.get("Card Series").getAsString(); + Integer acceleratorId = extractAcceleratorId(jsonObject); + Accelerator accelerator = new Accelerator(model, AcceleratorVendor.AMD, acceleratorId); + + // Set optional fields using GSON's has() method + if (jsonObject.has("GPU use (%)")) { + accelerator.setUsagePercentage( + Float.parseFloat(jsonObject.get("GPU use (%)").getAsString())); + } + + if (jsonObject.has("GPU Memory Allocated (VRAM%)")) { + accelerator.setMemoryUtilizationPercentage( + Float.parseFloat(jsonObject.get("GPU Memory Allocated (VRAM%)").getAsString())); + } + + if (jsonObject.has("VRAM Total Memory (B)")) { + String totalMemoryStr = jsonObject.get("VRAM Total Memory (B)").getAsString().strip(); + Long totalMemoryBytes = Long.parseLong(totalMemoryStr); + accelerator.setMemoryAvailableMegaBytes( + IAcceleratorUtility.bytesToMegabytes(totalMemoryBytes)); + } + + if (jsonObject.has("VRAM Total Used Memory (B)")) { + String usedMemoryStr = jsonObject.get("VRAM Total Used Memory (B)").getAsString(); + Long usedMemoryBytes = Long.parseLong(usedMemoryStr); + accelerator.setMemoryUtilizationMegabytes( + IAcceleratorUtility.bytesToMegabytes(usedMemoryBytes)); + } + + return accelerator; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/utils/XpuUtil.java b/frontend/server/src/main/java/org/pytorch/serve/device/utils/XpuUtil.java new file mode 100644 index 0000000000..2ec2900035 --- /dev/null +++ b/frontend/server/src/main/java/org/pytorch/serve/device/utils/XpuUtil.java @@ -0,0 +1,90 @@ +package org.pytorch.serve.device.utils; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; +import org.pytorch.serve.device.interfaces.ICsvSmiParser; + +public class XpuUtil implements IAcceleratorUtility, ICsvSmiParser { + + @Override + public String getGpuEnvVariableName() { + return "XPU_VISIBLE_DEVICES"; + } + + @Override + public ArrayList getAvailableAccelerators( + final LinkedHashSet availableAcceleratorIds) { + final String[] smiCommand = { + "xpu-smi", + "discovery", + "--dump", // output as csv + String.join( + ",", + "1", // device Id + "2", // Device name + "16" // Memory physical size + ) + }; + final String smiOutput = IAcceleratorUtility.callSMI(smiCommand); + + final String acceleratorEnv = getGpuEnvVariableName(); + final String requestedAccelerators = System.getenv(acceleratorEnv); + final LinkedHashSet parsedAcceleratorIds = + IAcceleratorUtility.parseVisibleDevicesEnv(requestedAccelerators); + + return csvSmiOutputToAccelerators( + smiOutput, parsedAcceleratorIds, this::parseDiscoveryOutput); + } + + @Override + public final ArrayList smiOutputToUpdatedAccelerators( + final String smiOutput, final LinkedHashSet parsedGpuIds) { + return csvSmiOutputToAccelerators(smiOutput, parsedGpuIds, this::parseUtilizationOutput); + } + + @Override + public String[] getUtilizationSmiCommand() { + // https://intel.github.io/xpumanager/smi_user_guide.html#get-the-device-real-time-statistics + // Timestamp, DeviceId, GPU Utilization (%), GPU Memory Utilization (%) + // 06:14:46.000, 0, 0.00, 14.61 + // 06:14:47.000, 1, 0.00, 14.59 + final String[] smiCommand = { + "xpu-smi", + "dump", + "-d -1", // all devices + "-n 1", // one dump + "-m", // metrics + String.join( + ",", + "0", // GPU Utilization (%), GPU active time of the elapsed time, per tile or + // device. + // Device-level is the average value of tiles for multi-tiles. + "5" // GPU Memory Utilization (%), per tile or device. Device-level is the + // average + // value of tiles for multi-tiles. + ) + }; + + return smiCommand; + } + + private Accelerator parseDiscoveryOutput(String[] parts) { + final int acceleratorId = Integer.parseInt(parts[1].trim()); + final String deviceName = parts[2].trim(); + logger.debug("Found accelerator at index: {}, Card name: {}", acceleratorId, deviceName); + return new Accelerator(deviceName, AcceleratorVendor.INTEL, acceleratorId); + } + + private Accelerator parseUtilizationOutput(String[] parts) { + final int acceleratorId = Integer.parseInt(parts[1].trim()); + final Float usagePercentage = Float.parseFloat(parts[2]); + final Float memoryUsagePercentage = Float.parseFloat(parts[3]); + Accelerator accelerator = new Accelerator("", AcceleratorVendor.INTEL, acceleratorId); + accelerator.setUsagePercentage(usagePercentage); + accelerator.setMemoryUtilizationPercentage(memoryUsagePercentage); + return accelerator; + } +} diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java index 70f5a1c644..12a00d57d0 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ApiUtils.java @@ -374,7 +374,8 @@ public static String getWorkerStatus() { } else if ((numWorking == 0) && (numScaled > 0)) { response = "Unhealthy"; } - // TODO: Check if its OK to send other 2xx errors to ALB for "Partial Healthy" and + // TODO: Check if its OK to send other 2xx errors to ALB for "Partial Healthy" + // and // "Unhealthy" return response; } diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 791dac511c..e73b138b4c 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -5,11 +5,9 @@ import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslContextBuilder; import io.netty.handler.ssl.util.SelfSignedCertificate; -import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.lang.reflect.Field; import java.lang.reflect.Type; import java.net.InetAddress; @@ -27,7 +25,6 @@ import java.security.cert.X509Certificate; import java.security.spec.InvalidKeySpecException; import java.security.spec.PKCS8EncodedKeySpec; -import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; import java.util.Collection; @@ -46,6 +43,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.io.IOUtils; import org.pytorch.serve.archive.model.Manifest; +import org.pytorch.serve.device.SystemInfo; import org.pytorch.serve.metrics.MetricBuilder; import org.pytorch.serve.servingsdk.snapshot.SnapshotSerializer; import org.pytorch.serve.snapshot.SnapshotSerializerFactory; @@ -128,7 +126,8 @@ public final class ConfigManager { private static final String TS_DISABLE_TOKEN_AUTHORIZATION = "disable_token_authorization"; private static final String TS_ENABLE_MODEL_API = "enable_model_api"; - // Configuration which are not documented or enabled through environment variables + // Configuration which are not documented or enabled through environment + // variables private static final String USE_NATIVE_IO = "use_native_io"; private static final String IO_RATIO = "io_ratio"; private static final String METRIC_TIME_INTERVAL = "metric_time_interval"; @@ -176,10 +175,13 @@ public final class ConfigManager { private String headerKeySequenceStart; private String headerKeySequenceEnd; + public SystemInfo systemInfo; + private static final Logger logger = LoggerFactory.getLogger(ConfigManager.class); private ConfigManager(Arguments args) throws IOException { prop = new Properties(); + this.systemInfo = new SystemInfo(); this.snapshotDisabled = args.isSnapshotDisabled(); String version = readFile(getModelServerHome() + "/ts/version.txt"); @@ -271,7 +273,7 @@ private ConfigManager(Arguments args) throws IOException { TS_NUMBER_OF_GPU, String.valueOf( Integer.min( - getAvailableGpu(), + this.systemInfo.getNumberOfAccelerators(), getIntProperty(TS_NUMBER_OF_GPU, Integer.MAX_VALUE)))); String pythonExecutable = args.getPythonExecutable(); @@ -931,83 +933,6 @@ private static String getCanonicalPath(String path) { return getCanonicalPath(new File(path)); } - private static int getAvailableGpu() { - try { - - List gpuIds = new ArrayList<>(); - String visibleCuda = System.getenv("CUDA_VISIBLE_DEVICES"); - if (visibleCuda != null && !visibleCuda.isEmpty()) { - String[] ids = visibleCuda.split(","); - for (String id : ids) { - gpuIds.add(Integer.parseInt(id)); - } - } else if (System.getProperty("os.name").startsWith("Mac")) { - Process process = Runtime.getRuntime().exec("system_profiler SPDisplaysDataType"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - - BufferedReader reader = - new BufferedReader(new InputStreamReader(process.getInputStream())); - String line; - while ((line = reader.readLine()) != null) { - if (line.contains("Chipset Model:") && !line.contains("Apple M1")) { - return 0; - } - if (line.contains("Total Number of Cores:")) { - String[] parts = line.split(":"); - if (parts.length >= 2) { - return (Integer.parseInt(parts[1].trim())); - } - } - } - // No MPS devices detected - return 0; - } else { - - try { - Process process = - Runtime.getRuntime().exec("nvidia-smi --query-gpu=index --format=csv"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !"index".equals(list.get(0))) { - throw new AssertionError("Unexpected nvidia-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - } catch (IOException | InterruptedException e) { - System.out.println("nvidia-smi not available or failed: " + e.getMessage()); - } - try { - Process process = Runtime.getRuntime().exec("xpu-smi discovery --dump 1"); - int ret = process.waitFor(); - if (ret != 0) { - return 0; - } - List list = - IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); - if (list.isEmpty() || !list.get(0).contains("Device ID")) { - throw new AssertionError("Unexpected xpu-smi response."); - } - for (int i = 1; i < list.size(); i++) { - gpuIds.add(Integer.parseInt(list.get(i))); - } - } catch (IOException | InterruptedException e) { - logger.debug("xpu-smi not available or failed: " + e.getMessage()); - } - } - return gpuIds.size(); - } catch (IOException | InterruptedException e) { - return 0; - } - } - public List getAllowedUrls() { String allowedURL = prop.getProperty(TS_ALLOWED_URLS, DEFAULT_TS_ALLOWED_URLS); return Arrays.asList(allowedURL.split(",")); diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerLifeCycle.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerLifeCycle.java index 74b31dfd24..0b3186f099 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerLifeCycle.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerLifeCycle.java @@ -135,7 +135,9 @@ private void startWorkerPython(int port, String deviceIds) attachRunner(argl, envp, port, deviceIds); } else { if (deviceIds != null) { - envp.add("CUDA_VISIBLE_DEVICES=" + deviceIds); + String visibleDeviceEnvName = + configManager.systemInfo.getVisibleDevicesEnvName(); + envp.add(visibleDeviceEnvName + "=" + deviceIds); } argl.add(EnvironmentUtils.getPythonRunTime(model)); } diff --git a/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerThread.java b/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerThread.java index bedf5fac3e..28c1eee18c 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerThread.java +++ b/frontend/server/src/main/java/org/pytorch/serve/wlm/WorkerThread.java @@ -9,13 +9,9 @@ import io.netty.channel.ChannelPipeline; import io.netty.channel.EventLoopGroup; import io.netty.channel.SimpleChannelInboundHandler; -import java.io.BufferedReader; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.SocketAddress; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; +import org.pytorch.serve.device.Accelerator; import org.pytorch.serve.job.Job; import org.pytorch.serve.job.RestJob; import org.pytorch.serve.metrics.IMetric; @@ -118,51 +115,13 @@ public WorkerState getState() { } public String getGpuUsage() { - Process process; StringBuffer gpuUsage = new StringBuffer(); if (gpuId >= 0) { try { - // TODO : add a generic code to capture gpu details for different devices instead of - // just NVIDIA - ProcessBuilder pb = - new ProcessBuilder( - "nvidia-smi", - "-i", - String.valueOf(gpuId), - "--query-gpu=utilization.gpu,utilization.memory,memory.used", - "--format=csv"); - - // Start the process - process = pb.start(); - process.waitFor(); - int exitCode = process.exitValue(); - if (exitCode != 0) { - gpuUsage.append("failed to obtained gpu usage"); - InputStream error = process.getErrorStream(); - for (int i = 0; i < error.available(); i++) { - logger.error("" + error.read()); - } - return gpuUsage.toString(); - } - InputStream stdout = process.getInputStream(); - BufferedReader reader = - new BufferedReader(new InputStreamReader(stdout, StandardCharsets.UTF_8)); - String line; - String[] headers = new String[3]; - Boolean firstLine = true; - while ((line = reader.readLine()) != null) { - if (firstLine) { - headers = line.split(","); - firstLine = false; - } else { - String[] values = line.split(","); - StringBuffer sb = new StringBuffer("gpuId::" + gpuId + " "); - for (int i = 0; i < headers.length; i++) { - sb.append(headers[i] + "::" + values[i].strip()); - } - gpuUsage.append(sb.toString()); - } - } + configManager.systemInfo.updateAcceleratorMetrics(); + Accelerator accelerator = + this.configManager.systemInfo.getAccelerators().get(gpuId); + return accelerator.utilizationToString(); } catch (Exception e) { gpuUsage.append("failed to obtained gpu usage"); logger.error("Exception Raised : " + e.toString()); @@ -333,7 +292,8 @@ public void run() { } } finally { // WorkerThread is running in thread pool, the thread will be assigned to next - // Runnable once this worker is finished. If currentThread keep holding the reference + // Runnable once this worker is finished. If currentThread keep holding the + // reference // of the thread, currentThread.interrupt() might kill next worker. for (int i = 0; i < backendChannel.size(); i++) { backendChannel.get(i).disconnect(); diff --git a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java index f419a26657..bd7f654ce7 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java +++ b/frontend/server/src/test/java/org/pytorch/serve/ModelServerTest.java @@ -26,7 +26,9 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.security.GeneralSecurityException; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.regex.Pattern; @@ -1340,6 +1342,24 @@ public void testErrorBatch() throws InterruptedException { alwaysRun = true, dependsOnMethods = {"testErrorBatch"}) public void testMetricManager() throws JsonParseException, InterruptedException { + final String UNIT = "Unit"; + final String LEVEL = "Level"; + final String HOST = "Host"; + + // Define expected metrics + // See ts/metrics/system_metrics.py, ts/configs/metrics.yaml + Map> expectedMetrics = new HashMap<>(); + expectedMetrics.put("GPUMemoryUtilization", Map.of(UNIT, "Percent", LEVEL, HOST)); + expectedMetrics.put("GPUMemoryUsed", Map.of(UNIT, "Megabytes", LEVEL, HOST)); + expectedMetrics.put("GPUUtilization", Map.of(UNIT, "Percent", LEVEL, HOST)); + expectedMetrics.put("CPUUtilization", Map.of(UNIT, "Percent", LEVEL, HOST)); + expectedMetrics.put("MemoryUsed", Map.of(UNIT, "Megabytes", LEVEL, HOST)); + expectedMetrics.put("MemoryAvailable", Map.of(UNIT, "Megabytes", LEVEL, HOST)); + expectedMetrics.put("MemoryUtilization", Map.of(UNIT, "Percent", LEVEL, HOST)); + expectedMetrics.put("DiskUsage", Map.of(UNIT, "Gigabytes", LEVEL, HOST)); + expectedMetrics.put("DiskUtilization", Map.of(UNIT, "Percent", LEVEL, HOST)); + expectedMetrics.put("DiskAvailable", Map.of(UNIT, "Gigabytes", LEVEL, HOST)); + MetricManager.scheduleMetrics(configManager); MetricManager metricManager = MetricManager.getInstance(); List metrics = metricManager.getMetrics(); @@ -1347,23 +1367,31 @@ public void testMetricManager() throws JsonParseException, InterruptedException // Wait till first value is read in int count = 0; while (metrics.isEmpty()) { - Thread.sleep(500); + Thread.sleep(1000); metrics = metricManager.getMetrics(); Assert.assertTrue(++count < 5); } + + // 7 system-level metrics + 3 gpu-specific metrics + Assert.assertEquals(metrics.size(), 7 + 3 * configManager.getNumberOfGpu()); + for (Metric metric : metrics) { - if (metric.getMetricName().equals("CPUUtilization")) { - Assert.assertEquals(metric.getUnit(), "Percent"); - } - if (metric.getMetricName().equals("MemoryUsed")) { - Assert.assertEquals(metric.getUnit(), "Megabytes"); + String metricName = metric.getMetricName(); + Assert.assertTrue(expectedMetrics.containsKey(metricName)); + + Map expectedValues = expectedMetrics.get(metricName); + Assert.assertEquals(expectedValues.get(UNIT), metric.getUnit()); + + List dimensions = metric.getDimensions(); + Map dimensionMap = new HashMap<>(); + for (Dimension dimension : dimensions) { + dimensionMap.put(dimension.getName(), dimension.getValue()); } - if (metric.getMetricName().equals("DiskUsed")) { - List dimensions = metric.getDimensions(); - for (Dimension dimension : dimensions) { - if (dimension.getName().equals("Level")) { - Assert.assertEquals(dimension.getValue(), "Host"); - } + + for (Map.Entry entry : expectedValues.entrySet()) { + if (!entry.getKey().equals(UNIT)) { + Assert.assertTrue(dimensionMap.containsKey(entry.getKey())); + Assert.assertEquals(entry.getValue(), dimensionMap.get(entry.getKey())); } } } diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/AcceleratorTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/AcceleratorTest.java new file mode 100644 index 0000000000..3dd2e07107 --- /dev/null +++ b/frontend/server/src/test/java/org/pytorch/serve/device/AcceleratorTest.java @@ -0,0 +1,76 @@ +package org.pytorch.serve.device; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class AcceleratorTest { + + @Test + public void testAcceleratorConstructor() { + Accelerator accelerator = new Accelerator("TestGPU", AcceleratorVendor.NVIDIA, 0); + Assert.assertEquals(accelerator.getAcceleratorModel(), "TestGPU"); + Assert.assertEquals(accelerator.getVendor(), AcceleratorVendor.NVIDIA); + Assert.assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(0)); + } + + @Test + public void testGettersAndSetters() { + Accelerator accelerator = new Accelerator("TestGPU", AcceleratorVendor.AMD, 1); + + accelerator.setMemoryAvailableMegaBytes(8192); + Assert.assertEquals(accelerator.getMemoryAvailableMegaBytes(), Integer.valueOf(8192)); + + accelerator.setUsagePercentage(75.5f); + Assert.assertEquals(accelerator.getUsagePercentage(), Float.valueOf(75.5f)); + + accelerator.setMemoryUtilizationPercentage(60.0f); + Assert.assertEquals(accelerator.getMemoryUtilizationPercentage(), Float.valueOf(60.0f)); + + accelerator.setMemoryUtilizationMegabytes(4096); + Assert.assertEquals(accelerator.getMemoryUtilizationMegabytes(), Integer.valueOf(4096)); + } + + @Test + public void testUtilizationToString() { + Accelerator accelerator = new Accelerator("TestGPU", AcceleratorVendor.NVIDIA, 2); + accelerator.setUsagePercentage(80.0f); + accelerator.setMemoryUtilizationPercentage(70.0f); + accelerator.setMemoryUtilizationMegabytes(5120); + + String expected = + "gpuId::2 utilization.gpu::80 % utilization.memory::70 % memory.used::5,120 MiB"; + Assert.assertEquals(accelerator.utilizationToString(), expected); + } + + @Test + public void testUpdateDynamicAttributes() { + Accelerator accelerator = new Accelerator("TestGPU", AcceleratorVendor.INTEL, 3); + accelerator.setUsagePercentage(42.42f); + accelerator.setMemoryUtilizationPercentage(1.0f); + accelerator.setMemoryUtilizationMegabytes(9999999); + Accelerator updated = new Accelerator("UpdatedGPU", AcceleratorVendor.INTEL, 3); + updated.setUsagePercentage(90.0f); + updated.setMemoryUtilizationPercentage(85.0f); + updated.setMemoryUtilizationMegabytes(6144); + + accelerator.updateDynamicAttributes(updated); + + Assert.assertEquals(accelerator.getUsagePercentage(), Float.valueOf(90.0f)); + Assert.assertEquals(accelerator.getMemoryUtilizationPercentage(), Float.valueOf(85.0f)); + Assert.assertEquals(accelerator.getMemoryUtilizationMegabytes(), Integer.valueOf(6144)); + + // Check that static attributes are not updated + Assert.assertEquals(accelerator.getAcceleratorModel(), "TestGPU"); + Assert.assertEquals(accelerator.getVendor(), AcceleratorVendor.INTEL); + Assert.assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(3)); + } + + @Test + public void testAcceleratorVendorEnumValues() { + Assert.assertEquals(AcceleratorVendor.AMD.name(), "AMD"); + Assert.assertEquals(AcceleratorVendor.NVIDIA.name(), "NVIDIA"); + Assert.assertEquals(AcceleratorVendor.INTEL.name(), "INTEL"); + Assert.assertEquals(AcceleratorVendor.APPLE.name(), "APPLE"); + Assert.assertEquals(AcceleratorVendor.UNKNOWN.name(), "UNKNOWN"); + } +} diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/SystemInfoTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/SystemInfoTest.java new file mode 100644 index 0000000000..05521217f8 --- /dev/null +++ b/frontend/server/src/test/java/org/pytorch/serve/device/SystemInfoTest.java @@ -0,0 +1,47 @@ +package org.pytorch.serve.device; + +import java.util.LinkedHashSet; +import org.pytorch.serve.device.interfaces.IAcceleratorUtility; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class SystemInfoTest { + + @Test + public void testParseVisibleDevicesEnv() { + LinkedHashSet result = IAcceleratorUtility.parseVisibleDevicesEnv("0,1,2"); + Assert.assertEquals(result.size(), 3); + Assert.assertTrue(result.contains(0)); + Assert.assertTrue(result.contains(1)); + Assert.assertTrue(result.contains(2)); + + result = IAcceleratorUtility.parseVisibleDevicesEnv("0, 1, 2"); + Assert.assertEquals(result.size(), 3); + Assert.assertTrue(result.contains(0)); + Assert.assertTrue(result.contains(1)); + Assert.assertTrue(result.contains(2)); + + result = IAcceleratorUtility.parseVisibleDevicesEnv("0,0,2"); + Assert.assertEquals(result.size(), 2); + Assert.assertTrue(result.contains(0)); + Assert.assertTrue(result.contains(2)); + + result = IAcceleratorUtility.parseVisibleDevicesEnv(""); + Assert.assertTrue(result.isEmpty()); + + result = IAcceleratorUtility.parseVisibleDevicesEnv(null); + Assert.assertTrue(result.isEmpty()); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testParseVisibleDevicesEnvInvalidInput() { + IAcceleratorUtility.parseVisibleDevicesEnv("0,1,a"); + } + + @Test + public void testBytesToMegabytes() { + Assert.assertEquals(IAcceleratorUtility.bytesToMegabytes(1048576L), Integer.valueOf(1)); + Assert.assertEquals(IAcceleratorUtility.bytesToMegabytes(2097152L), Integer.valueOf(2)); + Assert.assertEquals(IAcceleratorUtility.bytesToMegabytes(0L), Integer.valueOf(0)); + } +} diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java new file mode 100644 index 0000000000..e333f7ec83 --- /dev/null +++ b/frontend/server/src/test/java/org/pytorch/serve/device/utils/AppleUtilTest.java @@ -0,0 +1,121 @@ +package org.pytorch.serve.device.utils; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertEqualsNoOrder; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class AppleUtilTest { + + private AppleUtil appleUtil; + private String jsonStringPath; + private JsonObject sampleOutputJson; + + @BeforeClass + public void setUp() { + appleUtil = new AppleUtil(); + jsonStringPath = "src/test/resources/metrics/sample_apple_smi.json"; + + try { + FileReader reader = new FileReader(jsonStringPath); + JsonElement jsonElement = JsonParser.parseReader(reader); + sampleOutputJson = jsonElement.getAsJsonObject(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + @Test + public void testGetGpuEnvVariableName() { + assertNull(appleUtil.getGpuEnvVariableName()); + } + + @Test + public void testGetUtilizationSmiCommand() { + String[] expectedCommand = { + "system_profiler", "-json", "-detailLevel", "mini", "SPDisplaysDataType" + }; + assertEqualsNoOrder(appleUtil.getUtilizationSmiCommand(), expectedCommand); + } + + @Test + public void testJsonObjectToAccelerator() { + JsonObject gpuObject = + sampleOutputJson.getAsJsonArray("SPDisplaysDataType").get(0).getAsJsonObject(); + Accelerator accelerator = appleUtil.jsonObjectToAccelerator(gpuObject); + + assertNotNull(accelerator); + assertEquals(accelerator.getAcceleratorModel(), "Apple M1"); + assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE); + assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(0)); + assertEquals(accelerator.getUsagePercentage(), Float.valueOf(0f)); + assertEquals(accelerator.getMemoryUtilizationPercentage(), Float.valueOf(0f)); + assertEquals(accelerator.getMemoryUtilizationMegabytes(), Integer.valueOf(0)); + } + + @Test + public void testExtractAcceleratorId() { + JsonObject gpuObject = + sampleOutputJson.getAsJsonArray("SPDisplaysDataType").get(0).getAsJsonObject(); + assertEquals(appleUtil.extractAcceleratorId(gpuObject), Integer.valueOf(0)); + } + + @Test + public void testExtractAccelerators() { + List accelerators = appleUtil.extractAccelerators(sampleOutputJson); + + assertEquals(accelerators.size(), 7); + assertEquals(accelerators.get(0).get("sppci_model").getAsString(), "Apple M1"); + } + + @Test + public void testSmiOutputToUpdatedAccelerators() { + LinkedHashSet parsedGpuIds = new LinkedHashSet<>(); + parsedGpuIds.add(0); + + ArrayList updatedAccelerators = + appleUtil.smiOutputToUpdatedAccelerators(sampleOutputJson.toString(), parsedGpuIds); + + assertEquals(updatedAccelerators.size(), 7); + Accelerator accelerator = updatedAccelerators.get(0); + assertEquals(accelerator.getAcceleratorModel(), "Apple M1"); + assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE); + assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(0)); + } + + @Test + public void testGetAvailableAccelerators() { + LinkedHashSet availableAcceleratorIds = new LinkedHashSet<>(); + availableAcceleratorIds.add(0); + + // Mock the callSMI method to return our sample output + AppleUtil spyAppleUtil = + new AppleUtil() { + @Override + public String[] getUtilizationSmiCommand() { + return new String[] {"echo", sampleOutputJson.toString()}; + } + }; + + ArrayList availableAccelerators = + spyAppleUtil.getAvailableAccelerators(availableAcceleratorIds); + + assertEquals(availableAccelerators.size(), 7); + Accelerator accelerator = availableAccelerators.get(0); + assertEquals(accelerator.getAcceleratorModel(), "Apple M1"); + assertEquals(accelerator.getVendor(), AcceleratorVendor.APPLE); + assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(0)); + } +} diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/utils/CudaUtilTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/utils/CudaUtilTest.java new file mode 100644 index 0000000000..76c2012658 --- /dev/null +++ b/frontend/server/src/test/java/org/pytorch/serve/device/utils/CudaUtilTest.java @@ -0,0 +1,132 @@ +package org.pytorch.serve.device.utils; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class CudaUtilTest { + + private CudaUtil cudaUtil = new CudaUtil(); + + @Test + public void testGetGpuEnvVariableName() { + Assert.assertEquals(cudaUtil.getGpuEnvVariableName(), "CUDA_VISIBLE_DEVICES"); + } + + @Test + public void testGetUtilizationSmiCommand() { + String[] expectedCommand = { + "nvidia-smi", + "--query-gpu=index,gpu_name,utilization.gpu,utilization.memory,memory.used", + "--format=csv,nounits" + }; + Assert.assertEquals(cudaUtil.getUtilizationSmiCommand(), expectedCommand); + } + + @Test + public void testSmiOutputToUpdatedAccelerators() { + String smiOutput = + "index,gpu_name,utilization.gpu,utilization.memory,memory.used\n" + + "0,NVIDIA GeForce RTX 3080,50,60,8000\n" + + "1,NVIDIA Tesla V100,75,80,16000\n"; + LinkedHashSet parsedGpuIds = new LinkedHashSet<>(java.util.Arrays.asList(0, 1)); + + ArrayList accelerators = + cudaUtil.smiOutputToUpdatedAccelerators(smiOutput, parsedGpuIds); + + Assert.assertEquals(accelerators.size(), 2); + + Accelerator accelerator1 = accelerators.get(0); + Assert.assertEquals((int) accelerator1.getAcceleratorId(), 0); + Assert.assertEquals(accelerator1.getAcceleratorModel(), "NVIDIA GeForce RTX 3080"); + Assert.assertEquals((float) accelerator1.getUsagePercentage(), 50f); + Assert.assertEquals((float) accelerator1.getMemoryUtilizationPercentage(), 60f); + Assert.assertEquals((int) accelerator1.getMemoryUtilizationMegabytes(), 8000); + + Accelerator accelerator2 = accelerators.get(1); + Assert.assertEquals((int) accelerator2.getAcceleratorId(), 1); + Assert.assertEquals(accelerator2.getAcceleratorModel(), "NVIDIA Tesla V100"); + Assert.assertEquals((float) accelerator2.getUsagePercentage(), 75f); + Assert.assertEquals((float) accelerator2.getMemoryUtilizationPercentage(), 80f); + Assert.assertEquals((int) accelerator2.getMemoryUtilizationMegabytes(), 16000); + } + + @Test + public void testParseAccelerator() { + String[] parts = {"0", "NVIDIA GeForce RTX 3080"}; + Accelerator accelerator = cudaUtil.parseAccelerator(parts); + + Assert.assertEquals((int) accelerator.getAcceleratorId(), 0); + Assert.assertEquals(accelerator.getAcceleratorModel(), "NVIDIA GeForce RTX 3080"); + Assert.assertEquals(accelerator.getVendor(), AcceleratorVendor.NVIDIA); + } + + @Test + public void testParseAcceleratorWithDifferentId() { + String[] parts = {"2", "NVIDIA Tesla T4"}; + Accelerator accelerator = cudaUtil.parseAccelerator(parts); + + Assert.assertEquals((int) accelerator.getAcceleratorId(), 2); + Assert.assertEquals(accelerator.getAcceleratorModel(), "NVIDIA Tesla T4"); + Assert.assertEquals(accelerator.getVendor(), AcceleratorVendor.NVIDIA); + } + + @Test(expectedExceptions = NumberFormatException.class) + public void testParseAcceleratorWithInvalidId() { + String[] parts = {"invalid", "NVIDIA GeForce GTX 1080"}; + cudaUtil.parseAccelerator(parts); + } + + @Test + public void testParseUpdatedAccelerator() { + String[] parts = {"1", "NVIDIA Tesla V100", "75", "80", "16000"}; + Accelerator accelerator = cudaUtil.parseUpdatedAccelerator(parts); + + Assert.assertEquals((int) accelerator.getAcceleratorId(), 1); + Assert.assertEquals(accelerator.getAcceleratorModel(), "NVIDIA Tesla V100"); + Assert.assertEquals(accelerator.getVendor(), AcceleratorVendor.NVIDIA); + Assert.assertEquals((float) accelerator.getUsagePercentage(), 75f); + Assert.assertEquals((float) accelerator.getMemoryUtilizationPercentage(), 80f); + Assert.assertEquals((int) accelerator.getMemoryUtilizationMegabytes(), 16000); + } + + @Test + public void testParseUpdatedAcceleratorWithDifferentValues() { + String[] parts = {"3", "NVIDIA A100", "30.5", "45.7", "40960"}; + Accelerator accelerator = cudaUtil.parseUpdatedAccelerator(parts); + + Assert.assertEquals((int) accelerator.getAcceleratorId(), 3); + Assert.assertEquals(accelerator.getAcceleratorModel(), "NVIDIA A100"); + Assert.assertEquals(accelerator.getVendor(), AcceleratorVendor.NVIDIA); + Assert.assertEquals((float) accelerator.getUsagePercentage(), 30.5f); + Assert.assertEquals((float) accelerator.getMemoryUtilizationPercentage(), 45.7f); + Assert.assertEquals((int) accelerator.getMemoryUtilizationMegabytes(), 40960); + } + + @Test(expectedExceptions = NumberFormatException.class) + public void testParseUpdatedAcceleratorWithInvalidUsagePercentage() { + String[] parts = {"0", "NVIDIA GeForce RTX 2080", "invalid", "80", "8000"}; + cudaUtil.parseUpdatedAccelerator(parts); + } + + @Test(expectedExceptions = NumberFormatException.class) + public void testParseUpdatedAcceleratorWithInvalidMemoryUtilization() { + String[] parts = {"0", "NVIDIA GeForce RTX 2080", "75", "invalid", "8000"}; + cudaUtil.parseUpdatedAccelerator(parts); + } + + @Test(expectedExceptions = NumberFormatException.class) + public void testParseUpdatedAcceleratorWithInvalidMemoryUsage() { + String[] parts = {"0", "NVIDIA GeForce RTX 2080", "75", "80", "invalid"}; + cudaUtil.parseUpdatedAccelerator(parts); + } + + @Test(expectedExceptions = ArrayIndexOutOfBoundsException.class) + public void testParseUpdatedAcceleratorWithInsufficientData() { + String[] parts = {"0", "NVIDIA GeForce RTX 2080", "75", "80"}; + cudaUtil.parseUpdatedAccelerator(parts); + } +} diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/utils/ROCmUtilTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/utils/ROCmUtilTest.java new file mode 100644 index 0000000000..26e48264f5 --- /dev/null +++ b/frontend/server/src/test/java/org/pytorch/serve/device/utils/ROCmUtilTest.java @@ -0,0 +1,143 @@ +package org.pytorch.serve.device.utils; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertEqualsNoOrder; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import org.pytorch.serve.device.Accelerator; +import org.pytorch.serve.device.AcceleratorVendor; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +public class ROCmUtilTest { + + private ROCmUtil rocmUtil; + private String sampleDiscoveryJsonPath; + private String sampleMetricsJsonPath; + private String sampleUpdatedMetricsJsonPath; + private JsonObject sampleDiscoveryJsonObject; + private JsonObject sampleMetricsJsonObject; + private JsonObject sampleUpdatedMetricsJsonObject; + + @BeforeClass + public void setUp() { + rocmUtil = new ROCmUtil(); + sampleDiscoveryJsonPath = "src/test/resources/metrics/sample_amd_discovery.json"; + sampleMetricsJsonPath = "src/test/resources/metrics/sample_amd_metrics.json"; + sampleUpdatedMetricsJsonPath = "src/test/resources/metrics/sample_amd_updated_metrics.json"; + + try { + FileReader reader = new FileReader(sampleDiscoveryJsonPath); + JsonElement jsonElement = JsonParser.parseReader(reader); + sampleDiscoveryJsonObject = jsonElement.getAsJsonObject(); + + reader = new FileReader(sampleMetricsJsonPath); + jsonElement = JsonParser.parseReader(reader); + sampleMetricsJsonObject = jsonElement.getAsJsonObject(); + + reader = new FileReader(sampleUpdatedMetricsJsonPath); + jsonElement = JsonParser.parseReader(reader); + sampleUpdatedMetricsJsonObject = jsonElement.getAsJsonObject(); + + } catch (Exception e) { + e.printStackTrace(); + } + } + + @Test + public void testGetGpuEnvVariableName() { + assertEquals(rocmUtil.getGpuEnvVariableName(), "HIP_VISIBLE_DEVICES"); + } + + @Test + public void testGetUtilizationSmiCommand() { + String[] expectedCommand = { + "rocm-smi", + "--showid", + "--showproductname", + "--showuse", + "--showmemuse", + "--showmeminfo", + "vram", + "-P", + "--json" + }; + assertEqualsNoOrder(rocmUtil.getUtilizationSmiCommand(), expectedCommand); + } + + @Test + public void testExtractAccelerators() { + List accelerators = rocmUtil.extractAccelerators(sampleMetricsJsonObject); + assertEquals(accelerators.size(), 2); + assertEquals(accelerators.get(0).get("cardId").getAsString(), "card0"); + assertEquals(accelerators.get(1).get("cardId").getAsString(), "card1"); + } + + @Test + public void testExtractAcceleratorId() { + JsonObject card0Object = rocmUtil.extractAccelerators(sampleMetricsJsonObject).get(0); + JsonObject card1Object = rocmUtil.extractAccelerators(sampleMetricsJsonObject).get(1); + + Integer acceleratorId0 = rocmUtil.extractAcceleratorId(card0Object); + Integer acceleratorId1 = rocmUtil.extractAcceleratorId(card1Object); + + assertEquals(acceleratorId0, Integer.valueOf(0)); + assertEquals(acceleratorId1, Integer.valueOf(1)); + } + + @Test + public void testJsonMetricsObjectToAccelerator() { + JsonObject card0Object = rocmUtil.extractAccelerators(sampleMetricsJsonObject).get(0); + Accelerator accelerator = rocmUtil.jsonObjectToAccelerator(card0Object); + + assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(0)); + assertEquals(accelerator.getAcceleratorModel(), "AMD INSTINCT MI250 (MCM) OAM AC MBA"); + assertEquals(accelerator.getVendor(), AcceleratorVendor.AMD); + assertEquals((float) accelerator.getUsagePercentage(), 50.0f); + assertEquals((float) accelerator.getMemoryUtilizationPercentage(), 75.0f); + assertEquals(accelerator.getMemoryAvailableMegaBytes(), Integer.valueOf(65520)); + assertEquals(accelerator.getMemoryUtilizationMegabytes(), Integer.valueOf(49140)); + } + + @Test + public void testJsonDiscoveryObjectToAccelerator() { + JsonObject card0Object = rocmUtil.extractAccelerators(sampleDiscoveryJsonObject).get(0); + Accelerator accelerator = rocmUtil.jsonObjectToAccelerator(card0Object); + + assertEquals(accelerator.getAcceleratorId(), Integer.valueOf(0)); + assertEquals(accelerator.getAcceleratorModel(), "AMD INSTINCT MI250 (MCM) OAM AC MBA"); + assertEquals(accelerator.getVendor(), AcceleratorVendor.AMD); + } + + @Test + public void testSmiOutputToUpdatedAccelerators() { + String smiOutput = sampleMetricsJsonObject.toString(); + String updatedMetrics = sampleUpdatedMetricsJsonObject.toString(); + LinkedHashSet parsedGpuIds = new LinkedHashSet<>(); + parsedGpuIds.add(0); + parsedGpuIds.add(1); + + ArrayList accelerators = + rocmUtil.smiOutputToUpdatedAccelerators(smiOutput, parsedGpuIds); + accelerators = rocmUtil.smiOutputToUpdatedAccelerators(updatedMetrics, parsedGpuIds); + + assertEquals(accelerators.size(), 2); + + System.out.println(accelerators.toString()); + + Accelerator accelerator0 = accelerators.get(0); + assertEquals(accelerator0.getAcceleratorId(), Integer.valueOf(0)); + assertEquals(accelerator0.getAcceleratorModel(), "AMD INSTINCT MI250 (MCM) OAM AC MBA"); + assertEquals(accelerator0.getVendor(), AcceleratorVendor.AMD); + assertEquals((float) accelerator0.getUsagePercentage(), 25.0f); + assertEquals((float) accelerator0.getMemoryUtilizationPercentage(), 25.0f); + assertEquals(accelerator0.getMemoryAvailableMegaBytes(), Integer.valueOf(65520)); + assertEquals(accelerator0.getMemoryUtilizationMegabytes(), Integer.valueOf(49140)); + } +} diff --git a/frontend/server/src/test/java/org/pytorch/serve/device/utils/XpuUtilTest.java b/frontend/server/src/test/java/org/pytorch/serve/device/utils/XpuUtilTest.java new file mode 100644 index 0000000000..5656a1660c --- /dev/null +++ b/frontend/server/src/test/java/org/pytorch/serve/device/utils/XpuUtilTest.java @@ -0,0 +1,138 @@ +package org.pytorch.serve.device.utils; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.testng.Assert.*; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import org.pytorch.serve.device.Accelerator; +import org.testng.annotations.*; + +public class XpuUtilTest { + + private XpuUtil xpuUtil; + + @BeforeMethod + public void setUp() { + xpuUtil = new XpuUtil(); + } + + @Test + public void testGetGpuEnvVariableName() { + assertEquals( + xpuUtil.getGpuEnvVariableName(), + "XPU_VISIBLE_DEVICES", + "GPU environment variable name should be XPU_VISIBLE_DEVICES"); + } + + @Test + public void testGetUtilizationSmiCommand() { + String[] expectedCommand = {"xpu-smi", "dump", "-d -1", "-n 1", "-m", "0,5"}; + assertArrayEquals( + xpuUtil.getUtilizationSmiCommand(), + expectedCommand, + "Utilization SMI command should match expected"); + } + + @Test + public void testSmiOutputToUpdatedAccelerators() { + String smiOutput = + "Timestamp,DeviceId,GPU Utilization (%),GPU Memory Utilization (%)\n" + + "06:14:46.000,0,50.00,75.50\n" + + "06:14:47.000,1,25.00,60.25"; + + LinkedHashSet parsedGpuIds = new LinkedHashSet<>(); + parsedGpuIds.add(0); + parsedGpuIds.add(1); + + ArrayList updatedAccelerators = + xpuUtil.smiOutputToUpdatedAccelerators(smiOutput, parsedGpuIds); + + assertEquals(updatedAccelerators.size(), 2, "Should return 2 updated accelerators"); + assertEquals( + (int) updatedAccelerators.get(0).getAcceleratorId(), + 0, + "First accelerator should have ID 0"); + assertEquals( + (int) updatedAccelerators.get(1).getAcceleratorId(), + 1, + "Second accelerator should have ID 1"); + assertEquals( + (float) updatedAccelerators.get(0).getUsagePercentage(), + 50.00f, + 0.01, + "GPU utilization should match for first accelerator"); + assertEquals( + (float) updatedAccelerators.get(0).getMemoryUtilizationPercentage(), + 75.50f, + 0.01, + "Memory utilization should match for first accelerator"); + assertEquals( + (float) updatedAccelerators.get(1).getUsagePercentage(), + 25.00f, + 0.01, + "GPU utilization should match for second accelerator"); + assertEquals( + (float) updatedAccelerators.get(1).getMemoryUtilizationPercentage(), + 60.25f, + 0.01, + "Memory utilization should match for second accelerator"); + } + + @Test + public void testSmiOutputToUpdatedAcceleratorsWithFilteredIds() { + String smiOutput = + "Timestamp,DeviceId,GPU Utilization (%),GPU Memory Utilization (%)\n" + + "06:14:46.000,0,50.00,75.50\n" + + "06:14:47.000,1,25.00,60.25\n" + + "06:14:48.000,2,30.00,70.00"; + + LinkedHashSet parsedGpuIds = new LinkedHashSet<>(); + parsedGpuIds.add(0); + parsedGpuIds.add(2); + + ArrayList updatedAccelerators = + xpuUtil.smiOutputToUpdatedAccelerators(smiOutput, parsedGpuIds); + + assertEquals(updatedAccelerators.size(), 2, "Should return 2 updated accelerators"); + assertEquals( + (int) updatedAccelerators.get(0).getAcceleratorId(), + 0, + "First accelerator should have ID 0"); + assertEquals( + (int) updatedAccelerators.get(1).getAcceleratorId(), + 2, + "Second accelerator should have ID 2"); + assertEquals( + (float) updatedAccelerators.get(0).getUsagePercentage(), + 50.00f, + 0.01, + "GPU utilization should match for first accelerator"); + assertEquals( + (float) updatedAccelerators.get(0).getMemoryUtilizationPercentage(), + 75.50f, + 0.01, + "Memory utilization should match for first accelerator"); + assertEquals( + (float) updatedAccelerators.get(1).getUsagePercentage(), + 30.00f, + 0.01, + "GPU utilization should match for third accelerator"); + assertEquals( + (float) updatedAccelerators.get(1).getMemoryUtilizationPercentage(), + 70.00f, + 0.01, + "Memory utilization should match for third accelerator"); + } + + @Test + public void testSmiOutputToUpdatedAcceleratorsWithInvalidInput() { + String invalidSmiOutput = "Invalid SMI output"; + LinkedHashSet parsedGpuIds = new LinkedHashSet<>(); + parsedGpuIds.add(0); + + ArrayList accelerators = + xpuUtil.smiOutputToUpdatedAccelerators(invalidSmiOutput, parsedGpuIds); + assertEquals(accelerators.size(), 0); + } +} diff --git a/frontend/server/src/test/resources/metrics/sample_amd_discovery.json b/frontend/server/src/test/resources/metrics/sample_amd_discovery.json new file mode 100644 index 0000000000..32f49428be --- /dev/null +++ b/frontend/server/src/test/resources/metrics/sample_amd_discovery.json @@ -0,0 +1,26 @@ +{ + "card0": { + "Average Graphics Package Power (W)": "92.0", + "Card Series": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Card Model": "0x740c", + "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", + "Card SKU": "D65210V", + "Subsystem ID": "0x0b0c", + "Device Rev": "0x01", + "Node ID": "4", + "GUID": "11743", + "GFX Version": "gfx9010" + }, + "card1": { + "Average Graphics Package Power (W)": "N/A (Secondary die)", + "Card Series": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Card Model": "0x740c", + "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", + "Card SKU": "D65210V", + "Subsystem ID": "0x0b0c", + "Device Rev": "0x01", + "Node ID": "5", + "GUID": "61477", + "GFX Version": "gfx9010" + } +} diff --git a/frontend/server/src/test/resources/metrics/sample_amd_metrics.json b/frontend/server/src/test/resources/metrics/sample_amd_metrics.json new file mode 100644 index 0000000000..311091bb2a --- /dev/null +++ b/frontend/server/src/test/resources/metrics/sample_amd_metrics.json @@ -0,0 +1,46 @@ +{ + "card0": { + "Device Name": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Device ID": "0x740c", + "Device Rev": "0x01", + "Subsystem ID": "0x0b0c", + "GUID": "11743", + "Average Graphics Package Power (W)": "92.0", + "GPU use (%)": "50", + "GFX Activity": "62827955", + "GPU Memory Allocated (VRAM%)": "75", + "GPU Memory Read/Write Activity (%)": "0", + "Memory Activity": "17044038", + "Avg. Memory Bandwidth": "0", + "VRAM Total Memory (B)": "68702699520", + "VRAM Total Used Memory (B)": "51527024640", + "Card Series": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Card Model": "0x740c", + "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", + "Card SKU": "D65210V", + "Node ID": "4", + "GFX Version": "gfx9010" + }, + "card1": { + "Device Name": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Device ID": "0x740c", + "Device Rev": "0x01", + "Subsystem ID": "0x0b0c", + "GUID": "61477", + "Average Graphics Package Power (W)": "N/A (Secondary die)", + "GPU use (%)": "50", + "GFX Activity": "46030661", + "GPU Memory Allocated (VRAM%)": "50", + "GPU Memory Read/Write Activity (%)": "0", + "Memory Activity": "10645369", + "Avg. Memory Bandwidth": "0", + "VRAM Total Memory (B)": "68702699520", + "VRAM Total Used Memory (B)": "51527024640", + "Card Series": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Card Model": "0x740c", + "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", + "Card SKU": "D65210V", + "Node ID": "5", + "GFX Version": "gfx9010" + } +} diff --git a/frontend/server/src/test/resources/metrics/sample_amd_updated_metrics.json b/frontend/server/src/test/resources/metrics/sample_amd_updated_metrics.json new file mode 100644 index 0000000000..7b938c90b4 --- /dev/null +++ b/frontend/server/src/test/resources/metrics/sample_amd_updated_metrics.json @@ -0,0 +1,46 @@ +{ + "card0": { + "Device Name": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Device ID": "0x740c", + "Device Rev": "0x01", + "Subsystem ID": "0x0b0c", + "GUID": "11743", + "Average Graphics Package Power (W)": "92.0", + "GPU use (%)": "25", + "GFX Activity": "62827955", + "GPU Memory Allocated (VRAM%)": "25", + "GPU Memory Read/Write Activity (%)": "0", + "Memory Activity": "17044038", + "Avg. Memory Bandwidth": "0", + "VRAM Total Memory (B)": "68702699520", + "VRAM Total Used Memory (B)": "51527024640", + "Card Series": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Card Model": "0x740c", + "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", + "Card SKU": "D65210V", + "Node ID": "4", + "GFX Version": "gfx9010" + }, + "card1": { + "Device Name": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Device ID": "0x740c", + "Device Rev": "0x01", + "Subsystem ID": "0x0b0c", + "GUID": "61477", + "Average Graphics Package Power (W)": "N/A (Secondary die)", + "GPU use (%)": "50", + "GFX Activity": "46030661", + "GPU Memory Allocated (VRAM%)": "50", + "GPU Memory Read/Write Activity (%)": "0", + "Memory Activity": "10645369", + "Avg. Memory Bandwidth": "0", + "VRAM Total Memory (B)": "68702699520", + "VRAM Total Used Memory (B)": "51527024640", + "Card Series": "AMD INSTINCT MI250 (MCM) OAM AC MBA", + "Card Model": "0x740c", + "Card Vendor": "Advanced Micro Devices, Inc. [AMD/ATI]", + "Card SKU": "D65210V", + "Node ID": "5", + "GFX Version": "gfx9010" + } +} diff --git a/frontend/server/src/test/resources/metrics/sample_apple_smi.json b/frontend/server/src/test/resources/metrics/sample_apple_smi.json new file mode 100644 index 0000000000..036f1549eb --- /dev/null +++ b/frontend/server/src/test/resources/metrics/sample_apple_smi.json @@ -0,0 +1,33 @@ +{ + "SPDisplaysDataType": [ + { + "_name": "kHW_AppleM1Item", + "spdisplays_metalfamily": "spdisplays_mtlgpufamilyapple7", + "spdisplays_ndrvs": [ + { + "_name": "Color LCD", + "_spdisplays_display-product-id": "a045", + "_spdisplays_display-serial-number": "fd626d62", + "_spdisplays_display-vendor-id": "610", + "_spdisplays_display-week": "0", + "_spdisplays_display-year": "0", + "_spdisplays_displayID": "1", + "_spdisplays_pixels": "2880 x 1800", + "_spdisplays_resolution": "1440 x 900 @ 60.00Hz", + "spdisplays_ambient_brightness": "spdisplays_no", + "spdisplays_connection_type": "spdisplays_internal", + "spdisplays_main": "spdisplays_yes", + "spdisplays_mirror": "spdisplays_off", + "spdisplays_online": "spdisplays_yes", + "spdisplays_pixelresolution": "2880 x 1800", + "spdisplays_resolution": "1440 x 900 @ 60.00Hz" + } + ], + "spdisplays_vendor": "sppci_vendor_Apple", + "sppci_bus": "spdisplays_builtin", + "sppci_cores": "7", + "sppci_device_type": "spdisplays_gpu", + "sppci_model": "Apple M1" + } + ] +} diff --git a/frontend/server/testng.xml b/frontend/server/testng.xml index ee898ca7f9..8a5335ca91 100644 --- a/frontend/server/testng.xml +++ b/frontend/server/testng.xml @@ -16,5 +16,9 @@ - + + + + + diff --git a/kubernetes/kserve/tests/scripts/test_mnist.sh b/kubernetes/kserve/tests/scripts/test_mnist.sh index 5c3532e1e5..7771d2cea9 100755 --- a/kubernetes/kserve/tests/scripts/test_mnist.sh +++ b/kubernetes/kserve/tests/scripts/test_mnist.sh @@ -3,14 +3,13 @@ set -o errexit -o nounset -o pipefail device=$1 +TEST_GPU="false" if [ "$device" = "gpu" ]; then TEST_GPU="true" -else - TEST_GPU="false" fi -function validate_gpu_memory_usage() { +function validate_gpu_memory_usage_nvidia() { echo "Validating GPU memory usage..." memory_usage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits) @@ -32,6 +31,52 @@ function validate_gpu_memory_usage() { fi } +function validate_gpu_memory_usage_amd() { + # Capture the output of the command into an array, line by line + mapfile -t memory_usage < <(amd-smi metric --mem-usage --csv) + memory_above_zero=false + + for row in "${memory_usage[@]}"; do + # Read each column in the row separated by commas + IFS=',' read -r -a columns <<< "$row" + if [ "${columns[0]}" == "gpu" ]; then + continue + fi + + if [ "${columns[2]}" -gt 0 ]; then + memory_above_zero=true + break + fi + done + + if [ "$memory_above_zero" = true ]; then + echo "GPU memory usage is greater than 0, proceeding with the tests." + else + echo "✘ GPU memory usage is 0, indicating no GPU activity. Test failed." + delete_minikube_cluster + exit 1 + fi +} + +function validate_gpu_memory_usage() { + if [ "$GPU_TYPE" = "nvidia-smi" ]; then + validate_gpu_memory_usage_nvidia + elif [ "$GPU_TYPE" = "amd-smi" ]; then + validate_gpu_memory_usage_amd + fi +} + +function detect_gpu_smi() { + for cmd in nvidia-smi amd-smi system_profiler xpu-smi; do + if command -v "$cmd" && "$cmd" > /dev/null 2>&1; then + echo "$cmd found and able to communicate with GPU(s)." + GPU_TYPE=$cmd + return + fi + done + echo "Cannot communicate with GPU(s)." +} + function start_minikube_cluster() { echo "Removing any previous Kubernetes cluster" minikube delete @@ -204,6 +249,7 @@ install_kserve echo "MNIST KServe V2 test begin" if [ "$TEST_GPU" = "true" ]; then deploy_cluster "kubernetes/kserve/tests/configs/mnist_v2_gpu.yaml" "torchserve-mnist-v2-predictor" + detect_gpu_smi validate_gpu_memory_usage else deploy_cluster "kubernetes/kserve/tests/configs/mnist_v2_cpu.yaml" "torchserve-mnist-v2-predictor" diff --git a/requirements/common_gpu.txt b/requirements/common_gpu.txt deleted file mode 100644 index 1e893cc7c1..0000000000 --- a/requirements/common_gpu.txt +++ /dev/null @@ -1,2 +0,0 @@ -nvgpu; sys_platform != 'win32' -nvgpu==0.10.0; sys_platform == 'win32' diff --git a/requirements/torch_rocm60.txt b/requirements/torch_rocm60.txt new file mode 100644 index 0000000000..f07063f625 --- /dev/null +++ b/requirements/torch_rocm60.txt @@ -0,0 +1,5 @@ +--find-links https://download.pytorch.org/whl/torch_stable.html +-r torch_common.txt +torch==2.3.1+rocm6.0; sys_platform == 'linux' +torchvision==0.18.1+rocm6.0; sys_platform == 'linux' +torchaudio==2.3.1+rocm6.0; sys_platform == 'linux' diff --git a/requirements/torch_rocm61.txt b/requirements/torch_rocm61.txt new file mode 100644 index 0000000000..0030b05f7f --- /dev/null +++ b/requirements/torch_rocm61.txt @@ -0,0 +1,4 @@ +--index-url https://download.pytorch.org/whl/rocm6.1 +torch==2.4.1+rocm6.1; sys_platform == 'linux' +torchvision==0.19.1+rocm6.1; sys_platform == 'linux' +torchaudio==2.4.1+rocm6.1; sys_platform == 'linux' diff --git a/requirements/torch_rocm62.txt b/requirements/torch_rocm62.txt new file mode 100644 index 0000000000..291a07b410 --- /dev/null +++ b/requirements/torch_rocm62.txt @@ -0,0 +1,4 @@ +--index-url https://download.pytorch.org/whl/rocm6.2 +torch==2.5.1+rocm6.2; sys_platform == 'linux' +torchvision==0.20.1+rocm6.2; sys_platform == 'linux' +torchaudio==2.5.1+rocm6.2; sys_platform == 'linux' diff --git a/ts/metrics/metric_collector.py b/ts/metrics/metric_collector.py index 9e1f9d698c..8c01e5f1bc 100644 --- a/ts/metrics/metric_collector.py +++ b/ts/metrics/metric_collector.py @@ -1,5 +1,3 @@ - - """ Single start point for system metrics and process metrics script @@ -11,19 +9,14 @@ from ts.metrics import system_metrics from ts.metrics.process_memory_metric import check_process_mem_usage -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - "--gpu", - action="store", - help="number of GPU", - type=int - ) + parser.add_argument("--gpu", action="store", help="number of GPUs", type=int) arguments = parser.parse_args() logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO) - system_metrics.collect_all(sys.modules['ts.metrics.system_metrics'], arguments.gpu) + system_metrics.collect_all(sys.modules["ts.metrics.system_metrics"], arguments.gpu) check_process_mem_usage(sys.stdin) diff --git a/ts/metrics/system_metrics.py b/ts/metrics/system_metrics.py index e0a21f1c4f..5e69377f5a 100644 --- a/ts/metrics/system_metrics.py +++ b/ts/metrics/system_metrics.py @@ -1,11 +1,13 @@ """ Module to collect system metrics for front-end """ + import logging import types from builtins import str import psutil +import torch from ts.metrics.dimension import Dimension from ts.metrics.metric import Metric @@ -49,74 +51,78 @@ def disk_available(): system_metrics.append(Metric("DiskAvailable", data, "GB", dimension)) -def gpu_utilization(num_of_gpu): +def collect_gpu_metrics(num_of_gpus): """ - Collect gpu metrics. - - :param num_of_gpu: + Collect GPU metrics. Supports NVIDIA and AMD GPUs. + :param num_of_gpus: Total number of available GPUs. :return: """ - if num_of_gpu <= 0: + if num_of_gpus <= 0: return + for gpu_index in range(num_of_gpus): + if torch.version.cuda: + free, total = torch.cuda.mem_get_info(gpu_index) + mem_used = (total - free) // 1024**2 + gpu_mem_utilization = torch.cuda.memory_usage(gpu_index) + gpu_utilization = torch.cuda.utilization(gpu_index) + elif torch.version.hip: + # There is currently a bug in + # https://github.com/pytorch/pytorch/blob/838958de94ed3b9021ddb395fe3e7ed22a60b06c/torch/cuda/__init__.py#L1171 + # which does not capture the rate/percentage correctly. + # Otherwise same methods could be used. + # https://rocm.docs.amd.com/projects/amdsmi/en/latest/how-to/using-amdsmi-for-python.html#amdsmi-get-gpu-activity + import amdsmi + + try: + amdsmi.amdsmi_init() + + handle = amdsmi.amdsmi_get_processor_handles()[gpu_index] + mem_used = amdsmi.amdsmi_get_gpu_vram_usage(handle)["vram_used"] + engine_usage = amdsmi.amdsmi_get_gpu_activity(handle) + gpu_utilization = engine_usage["gfx_activity"] + gpu_mem_utilization = engine_usage["umc_activity"] + except amdsmi.AmdSmiException as e: + logging.error("Could not initialize AMD-SMI library.") + finally: + try: + amdsmi.amdsmi_shut_down() + except amdsmi.AmdSmiException as e: + logging.error("Could not shut down AMD-SMI library.") - # pylint: disable=wrong-import-position - # pylint: disable=import-outside-toplevel - import nvgpu - import pynvml - from nvgpu import list_gpus - - # pylint: enable=wrong-import-position - # pylint: enable=import-outside-toplevel - - info = nvgpu.gpu_info() - for value in info: dimension_gpu = [ Dimension("Level", "Host"), - Dimension("device_id", value["index"]), + Dimension("device_id", gpu_index), ] system_metrics.append( Metric( "GPUMemoryUtilization", - value["mem_used_percent"], + gpu_mem_utilization, "percent", dimension_gpu, ) ) + system_metrics.append(Metric("GPUMemoryUsed", mem_used, "MB", dimension_gpu)) system_metrics.append( - Metric("GPUMemoryUsed", value["mem_used"], "MB", dimension_gpu) + Metric("GPUUtilization", gpu_utilization, "percent", dimension_gpu) ) - try: - statuses = list_gpus.device_statuses() - except pynvml.nvml.NVMLError_NotSupported: - logging.error("gpu device monitoring not supported") - statuses = [] - for idx, status in enumerate(statuses): - dimension_gpu = [Dimension("Level", "Host"), Dimension("device_id", idx)] - system_metrics.append( - Metric("GPUUtilization", status["utilization"], "percent", dimension_gpu) - ) - - -def collect_all(mod, num_of_gpu): +def collect_all(mod, num_of_gpus): """ Collect all system metrics. :param mod: - :param num_of_gpu: + :param num_of_gpus: Total number of available GPUs. :return: """ - members = dir(mod) for i in members: value = getattr(mod, i) if isinstance(value, types.FunctionType) and value.__name__ not in ( "collect_all", - "log_msg", ): - if value.__name__ == "gpu_utilization": - value(num_of_gpu) + if value.__name__ == "collect_gpu_metrics": + value(num_of_gpus) else: value() diff --git a/ts/torch_handler/base_handler.py b/ts/torch_handler/base_handler.py index fa4be5841c..1dad241922 100644 --- a/ts/torch_handler/base_handler.py +++ b/ts/torch_handler/base_handler.py @@ -41,7 +41,7 @@ if packaging.version.parse(torch.__version__) >= packaging.version.parse("2.0.0a"): PT2_AVAILABLE = True - if torch.cuda.is_available(): + if torch.cuda.is_available() and torch.version.cuda: # If Ampere enable tensor cores which will give better performance # Ideally get yourself an A10G or A100 for optimal performance if torch.cuda.get_device_capability() >= (8, 0): @@ -227,7 +227,7 @@ def initialize(self, context): if "compile" in pt2_value: compile_options = pt2_value["compile"] - if compile_options["enable"] == True: + if compile_options["enable"]: del compile_options["enable"] # if backend is not provided, compile will use its default, which is valid @@ -284,7 +284,7 @@ def initialize(self, context): self.model = self.model.to(memory_format=torch.channels_last) self.model = self.model.to(self.device) self.model = ipex.optimize(self.model) - logger.info(f"Compiled model with ipex") + logger.info("Compiled model with ipex") logger.debug("Model file %s loaded successfully", self.model_pt_path) @@ -364,7 +364,7 @@ def _use_torch_export_aot_compile(self): export_value = pt2_value.get("export", None) if isinstance(export_value, dict) and "aot_compile" in export_value: torch_export_aot_compile = ( - True if export_value["aot_compile"] == True else False + True if export_value["aot_compile"] else False ) return torch_export_aot_compile diff --git a/ts_scripts/install_dependencies.py b/ts_scripts/install_dependencies.py index 20bd76599a..4d464e03fe 100644 --- a/ts_scripts/install_dependencies.py +++ b/ts_scripts/install_dependencies.py @@ -94,7 +94,12 @@ def install_java(self): def install_nodejs(self): pass - def install_torch_packages(self, cuda_version): + def install_torch_packages(self, cuda_version=None, rocm_version=None): + if cuda_version and rocm_version: + raise ValueError( + "Cannot install both CUDA and ROCm dependencies, please pass only either one." + ) + if cuda_version: if platform.system() == "Darwin": print( @@ -110,6 +115,16 @@ def install_torch_packages(self, cuda_version): os.system( f"{sys.executable} -m pip install -U -r requirements/torch_{cuda_version}_{platform.system().lower()}.txt" ) + elif rocm_version: + if platform.system() in ["Darwin", "Windows"]: + print( + f"ROCm not supported on {platform.system()}. Refer https://pytorch.org/." + ) + sys.exit(1) + else: + os.system( + f"{sys.executable} -m pip install -U -r requirements/torch_{rocm_version}.txt" + ) elif args.neuronx: torch_neuronx_requirements_file = os.path.join( "requirements", "torch_neuronx_linux.txt" @@ -127,7 +142,9 @@ def install_torch_packages(self, cuda_version): f"{sys.executable} -m pip install -U -r requirements/torch_{platform.system().lower()}.txt" ) - def install_python_packages(self, cuda_version, requirements_file_path, nightly): + def install_python_packages( + self, cuda_version, rocm_version, requirements_file_path, nightly + ): check = "where" if platform.system() == "Windows" else "which" if os.system(f"{check} conda") == 0: # conda install command should run before the pip install commands @@ -143,16 +160,13 @@ def install_python_packages(self, cuda_version, requirements_file_path, nightly) elif args.skip_torch_install: print("Skipping Torch installation") else: - self.install_torch_packages(cuda_version) + self.install_torch_packages( + cuda_version=cuda_version, rocm_version=rocm_version + ) # developer.txt also installs packages from common.txt os.system(f"{sys.executable} -m pip install -U -r {requirements_file_path}") - # Install dependencies for GPU - if not isinstance(cuda_version, type(None)): - gpu_requirements_file = os.path.join("requirements", "common_gpu.txt") - os.system(f"{sys.executable} -m pip install -U -r {gpu_requirements_file}") - # Install dependencies for Inferentia2 if args.neuronx: neuronx_requirements_file = os.path.join("requirements", "neuronx.txt") @@ -306,7 +320,7 @@ def install_neuronx_driver(self): pass -def install_dependencies(cuda_version=None, nightly=False): +def install_dependencies(cuda_version=None, rocm_version=None, nightly=False): os_map = {"Linux": Linux, "Windows": Windows, "Darwin": Darwin} system = os_map[platform.system()]() @@ -325,7 +339,12 @@ def install_dependencies(cuda_version=None, nightly=False): requirements_file = "common.txt" if args.environment == "prod" else "developer.txt" requirements_file_path = os.path.join("requirements", requirements_file) - system.install_python_packages(cuda_version, requirements_file_path, nightly) + system.install_python_packages( + cuda_version, + rocm_version, + requirements_file_path=requirements_file_path, + nightly=nightly, + ) if args.cpp: system.install_cpp_dependencies() @@ -363,6 +382,12 @@ def get_brew_version(): action="store_true", help="Install dependencies for inferentia2 support", ) + parser.add_argument( + "--rocm", + default=None, + choices=["rocm60", "rocm61", "rocm62"], + help="ROCm version for torch", + ) parser.add_argument( "--cpp", action="store_true", @@ -394,4 +419,6 @@ def get_brew_version(): ) args = parser.parse_args() - install_dependencies(cuda_version=args.cuda, nightly=args.nightly_torch) + install_dependencies( + cuda_version=args.cuda, rocm_version=args.rocm, nightly=args.nightly_torch + ) diff --git a/ts_scripts/install_utils b/ts_scripts/install_utils index 3dd01da6fe..08e5678b2d 100755 --- a/ts_scripts/install_utils +++ b/ts_scripts/install_utils @@ -25,14 +25,16 @@ install_java_deps() set -e } -install_torch_deps() -{ - if is_gpu_instance && [ ! -z "$1" ]; +install_torch_deps() { + if [ ! -z "$1" ]; then - pip install -U -r requirements_$1.txt -f https://download.pytorch.org/whl/torch_stable.html - else - pip install -U -r requirements.txt - fi + if [[ "$1" == *"cu"* || "$1" == *"rocm"* ]] && ! is_gpu_instance; + then + echo "Cannot install GPU-specific requirements." + exit 1 + fi + pip install -U -r requirements/$1.txt + fi } install_pytest_suite_deps() @@ -275,20 +277,15 @@ clean_up_build_residuals() rm -rf ts/utils/__pycache__/ } -is_gpu_instance(){ - if command -v nvidia-smi; - then - nvidia-smi | grep 'NVIDIA-SMI has failed' - if [ $? == 0 ]; - then - return 1 - else +is_gpu_instance() { + for cmd in nvidia-smi amd-smi system_profiler xpu-smi; do + if command -v "$cmd" && "$cmd" > /dev/null 2>&1; then + echo "$cmd found and able to communicate with GPU(s)." return 0 fi - - else - return 1 - fi + done + echo "Cannot communicate with GPU(s)." + return 1 } run_markdown_link_checker(){ @@ -303,4 +300,4 @@ run_markdown_link_checker(){ done set -e exit $STATUS -} \ No newline at end of file +} diff --git a/ts_scripts/print_env_info.py b/ts_scripts/print_env_info.py index 0e74a61661..2d3fc059ae 100644 --- a/ts_scripts/print_env_info.py +++ b/ts_scripts/print_env_info.py @@ -13,6 +13,7 @@ except (ImportError, NameError, AttributeError): TORCH_AVAILABLE = False + torchserve_env = { "torch": "**Warning: torch not present ..", "torch_model_archiver": "**Warning: torch-model-archiver not installed ..", @@ -38,7 +39,16 @@ "cuda_runtime_version": "N/A", "nvidia_gpu_models": [], "nvidia_driver_version": "N/A", - "cudnn_version": [], + "nvidia_driver_cuda_version": "N/A", + "cudnn_version": "N/A", +} + +hip_env = { + "is_hip_available": "No", + "hip_runtime_version": "N/A", + "amd_gpu_models": [], + "rocm_version": "N/A", + "miopen_version": "N/A", } npm_env = {"npm_pkg_version": []} @@ -46,14 +56,6 @@ cpp_env = {"LIBRARY_PATH": ""} -def get_nvidia_smi(): - # Note: nvidia-smi is currently available only on Windows and Linux - smi = "nvidia-smi" - if get_platform() == "win32": - smi = "nvidia-smi.exe" - return smi - - def run(command): """Returns (return-code, stdout, stderr)""" p = subprocess.Popen( @@ -110,7 +112,7 @@ def run_with_pip(pip): elif package_name == "torch": grep_cmd = 'grep "' + package_name + '"' else: - grep_cmd = r'grep "numpy\|pytest\|pylint\|transformers\|psutil\|wheel\|requests\|sentencepiece\|pillow\|captum\|nvgpu\|pygit2\|torch"' + grep_cmd = r'grep "numpy\|pytest\|pylint\|transformers\|psutil\|wheel\|requests\|sentencepiece\|pillow\|captum\|pygit2\|torch"' return run_and_read_all(pip + " list --format=freeze | " + grep_cmd) out = run_with_pip("pip3") @@ -197,65 +199,58 @@ def get_cmake_version(): return run_and_parse_first_match("cmake --version", r"cmake (.*)") +def get_gpu_info(): + num_of_gpus = torch.cuda.device_count() + gpu_types = [ + torch.cuda.get_device_name(gpu_index) for gpu_index in range(num_of_gpus) + ] + return "\n".join(["", *gpu_types]) + + def get_nvidia_driver_version(): - smi = get_nvidia_smi() - if get_platform() == "darwin": - cmd = "kextstat | grep -i cuda" - return run_and_parse_first_match(cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]") + # Local import because ts_scripts/install_dependencies.py + # imports a function from this module at a stage when pynvml is not yet installed + import pynvml - return run_and_parse_first_match(smi, r"Driver Version: (.*?) ") + pynvml.nvmlInit() + driver_version = pynvml.nvmlSystemGetDriverVersion() + pynvml.nvmlShutdown() + return driver_version -def get_nvidia_gpu_info(): - smi = get_nvidia_smi() - if get_platform() == "darwin": - if TORCH_AVAILABLE and torch.cuda.is_available(): - return torch.cuda.get_device_name(None) - return None - uuid_regex = re.compile(r" \(UUID: .+?\)") - rc, out, _ = run(smi + " -L") - if rc != 0: - return None - # Anonymize GPUs by removing their UUID - return "\n" + re.sub(uuid_regex, "", out) +def get_nvidia_driver_cuda_version(): + # Local import because ts_scripts/install_dependencies.py + # imports a function from this module at a stage when pynvml is not yet installed + import pynvml + + pynvml.nvmlInit() + cuda = pynvml.nvmlSystemGetCudaDriverVersion() + cuda_major = cuda // 1000 + cuda_minor = (cuda % 1000) // 10 + pynvml.nvmlShutdown() + return f"{cuda_major}.{cuda_minor}" def get_running_cuda_version(): - return run_and_parse_first_match("nvcc --version", r"V([\d.]+)") + cuda = torch._C._cuda_getCompiledVersion() + cuda_major = cuda // 1000 + cuda_minor = (cuda % 1000) // 10 + return f"{cuda_major}.{cuda_minor}" def get_cudnn_version(): - """This will return a list of libcudnn.so; it's hard to tell which one is being used""" - if get_platform() == "win32": - system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") - cuda_path = os.environ.get("CUDA_PATH", "%CUDA_PATH%") - where_cmd = os.path.join(system_root, "System32", "where") - cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path) - elif get_platform() == "darwin": - # CUDA libraries and drivers can be found in /usr/local/cuda/. See - cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*" - else: - cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' - rc, out, _ = run(cudnn_cmd) - # find will return 1 if there are permission errors or if not found - if len(out) == 0 or (rc != 1 and rc != 0): - l = os.environ.get("CUDNN_LIBRARY") - if l is not None and os.path.isfile(l): - return os.path.realpath(l) - return None - files = set() - for fn in out.split("\n"): - fn = os.path.realpath(fn) # eliminate symbolic links - if os.path.isfile(fn): - files.add(fn) - if not files: - return None - # Alphabetize the result because the order is non-deterministic otherwise - files = list(sorted(files)) - if len(files) == 1: - return files[0] - result = "\n".join(files) - return "Probably one of the following:\n{}".format(result) + cudnn = torch.backends.cudnn.version() + cudnn_major = cudnn // 10000 + cudnn = cudnn % 10000 + cudnn_minor = cudnn // 100 + cudnn_patch = cudnn % 100 + return f"{cudnn_major}.{cudnn_minor}.{cudnn_patch}" + + +def get_miopen_version(): + cfg = torch._C._show_config() + miopen = re.search("MIOpen \d+\.\d+\.\d+", cfg).group() + return miopen.split(" ")[1] def get_torchserve_version(): @@ -341,11 +336,22 @@ def populate_os_env(): def populate_cuda_env(cuda_available_str): cuda_env["is_cuda_available"] = cuda_available_str cuda_env["cuda_runtime_version"] = get_running_cuda_version() - cuda_env["nvidia_gpu_models"] = get_nvidia_gpu_info() + cuda_env["nvidia_gpu_models"] = get_gpu_info() cuda_env["nvidia_driver_version"] = get_nvidia_driver_version() + cuda_env["nvidia_driver_cuda_version"] = get_nvidia_driver_cuda_version() cuda_env["cudnn_version"] = get_cudnn_version() +def populate_hip_env(hip_available_str): + hip_env["is_hip_available"] = hip_available_str + hip_env["hip_runtime_version"] = torch.version.hip + hip_env["amd_gpu_models"] = get_gpu_info() + hip_env["rocm_version"] = run_and_parse_first_match( + "amd-smi version", r"ROCm version: ([\d.]+)" + ) + hip_env["miopen_version"] = get_miopen_version() + + def populate_npm_env(): npm_env["npm_pkg_version"] = get_npm_packages() @@ -371,8 +377,10 @@ def populate_env_info(): populate_os_env() # cuda environment - if TORCH_AVAILABLE and torch.cuda.is_available(): + if TORCH_AVAILABLE and torch.cuda.is_available() and torch.version.cuda: populate_cuda_env("Yes") + elif TORCH_AVAILABLE and torch.cuda.is_available() and torch.version.hip: + populate_hip_env("Yes") if get_platform() == "darwin": populate_npm_env() @@ -412,11 +420,20 @@ def populate_env_info(): cuda_info_fmt = """ Is CUDA available: {is_cuda_available} CUDA runtime version: {cuda_runtime_version} -GPU models and configuration: {nvidia_gpu_models} +NVIDIA GPU models and configuration: {nvidia_gpu_models} Nvidia driver version: {nvidia_driver_version} +Nvidia driver cuda version: {nvidia_driver_cuda_version} cuDNN version: {cudnn_version} """ +hip_info_fmt = """ +Is HIP available: {is_hip_available} +HIP runtime version: {hip_runtime_version} +AMD GPU models and configuration: {amd_gpu_models} +ROCm version: {rocm_version} +MIOpen version: {miopen_version} +""" + npm_info_fmt = """ Versions of npm installed packages: {npm_pkg_version} @@ -431,6 +448,7 @@ def populate_env_info(): def get_pretty_env_info(branch_name): global env_info_fmt global cuda_info_fmt + global hip_info_fmt global npm_info_fmt global cpp_env_info_fmt populate_env_info() @@ -443,9 +461,12 @@ def get_pretty_env_info(branch_name): **cpp_env, } - if TORCH_AVAILABLE and torch.cuda.is_available(): + if TORCH_AVAILABLE and torch.cuda.is_available() and torch.version.cuda: env_dict.update(cuda_env) env_info_fmt = env_info_fmt + "\n" + cuda_info_fmt + elif TORCH_AVAILABLE and torch.cuda.is_available() and torch.version.hip: + env_dict.update(hip_env) + env_info_fmt = env_info_fmt + "\n" + hip_info_fmt if get_platform() == "darwin": env_dict.update(npm_env) diff --git a/ts_scripts/sanity_utils.py b/ts_scripts/sanity_utils.py index f6b5126213..1c4329296f 100755 --- a/ts_scripts/sanity_utils.py +++ b/ts_scripts/sanity_utils.py @@ -8,10 +8,6 @@ import torch -from ts_scripts import marsgen as mg -from ts_scripts import tsutils as ts -from ts_scripts import utils - REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") sys.path.append(REPO_ROOT) MODELS_CONFIG_FILE_PATH = Path(__file__).parent.joinpath( @@ -19,6 +15,11 @@ ) +from ts_scripts import marsgen as mg +from ts_scripts import tsutils as ts +from ts_scripts import utils + + async def markdown_link_checker(in_queue, out_queue, n): print(f"worker started {n}") while True: @@ -75,15 +76,23 @@ def run_markdown_link_checker(): def validate_model_on_gpu(): # A quick \ crude way of checking if model is loaded in GPU # Assumption is - - # 1. GPUs on test setup are only utlizied by torchserve + # 1. GPUs on test setup are only utilized by torchserve # 2. Models are successfully UNregistered between subsequent calls - import nvgpu - model_loaded = False - for info in nvgpu.gpu_info(): - if info["mem_used"] > 0 and info["mem_used_percent"] > 0.0: + + if torch.cuda.is_available() and not (torch.version.cuda or torch.version.hip): + return model_loaded + + num_of_gpus = torch.cuda.device_count() + + for gpu_index in range(num_of_gpus): + free, total = torch.cuda.mem_get_info(gpu_index) + mem_used = (total - free) // 1024**2 + mem_used_percent = 100.0 * (1 - (free / total)) + + if mem_used > 0 and mem_used_percent > 0.0: model_loaded = True - break + return model_loaded @@ -218,7 +227,6 @@ def test_sanity(): def test_workflow_sanity(): - current_path = os.getcwd() ts_log_file = os.path.join("logs", "ts_console.log") os.makedirs("model_store", exist_ok=True) os.makedirs("logs", exist_ok=True) @@ -237,7 +245,7 @@ def test_workflow_sanity(): if response and response.status_code == 200: print(response.text) else: - print(f"## Failed to register workflow") + print("## Failed to register workflow") sys.exit(1) # Run prediction on workflow @@ -254,7 +262,7 @@ def test_workflow_sanity(): if response and response.status_code == 200: print(response.text) else: - print(f"## Failed to unregister workflow") + print("## Failed to unregister workflow") sys.exit(1) stopped = ts.stop_torchserve() diff --git a/ts_scripts/utils.py b/ts_scripts/utils.py index de755b8d2d..7fdbce68de 100644 --- a/ts_scripts/utils.py +++ b/ts_scripts/utils.py @@ -6,15 +6,25 @@ REPO_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..") sys.path.append(REPO_ROOT) + nvidia_smi_cmd = { "Windows": "nvidia-smi.exe", "Darwin": "nvidia-smi", "Linux": "nvidia-smi", } +amd_smi_cmd = { + "Linux": "amd-smi", +} + def is_gpu_instance(): - return True if os.system(nvidia_smi_cmd[platform.system()]) == 0 else False + return ( + True + if os.system(nvidia_smi_cmd[platform.system()]) == 0 + or os.system(amd_smi_cmd[platform.system()]) == 0 + else False + ) def is_conda_build_env(): diff --git a/ts_scripts/validate_model_on_gpu.py b/ts_scripts/validate_model_on_gpu.py index 733fda16db..3ee1014f71 100644 --- a/ts_scripts/validate_model_on_gpu.py +++ b/ts_scripts/validate_model_on_gpu.py @@ -1,13 +1,6 @@ -import nvgpu +from sanity_utils import validate_model_on_gpu -gpu_info = nvgpu.gpu_info() - -model_loaded = False - -for info in gpu_info: - if info['mem_used'] > 0 and info['mem_used_percent'] > 0.0: - model_loaded = True - break +model_loaded = validate_model_on_gpu() if not model_loaded: exit(1)