diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6c1361b46..e57994633 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,108 +32,61 @@ on: release: types: [published] - -env: - HOMEBREW_NO_ANALYTICS: "ON" # Make Homebrew installation a little quicker - HOMEBREW_NO_AUTO_UPDATE: "ON" - HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON" - HOMEBREW_NO_GITHUB_API: "ON" - HOMEBREW_NO_INSTALL_CLEANUP: "ON" - CIBW_SKIP: "pp* *i686*" # skip building for PyPy - CIBW_ARCHS_MACOS: x86_64 - CIBW_ARCHS_LINUX: x86_64 # ppc64le # uncomment to enable powerPC build - CIBW_ENVIRONMENT_MACOS: PATH="$(brew --prefix)/opt/make/libexec/gnubin:$PATH" - MACOSX_DEPLOYMENT_TARGET: "10.09" - - jobs: - build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-22.04, macos-12] - + build_dists: + name: Build Distributions + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 + with: + python-version: '3.9' - - name: Install cibuildwheel - run: python -m pip install cibuildwheel>=2.12.3 + - name: Install build + run: python -m pip install 'build>=1.2.2,<2' - name: Install build-essentials - if: contains(matrix.os, 'ubuntu') run: | sudo add-apt-repository ppa:ubuntu-toolchain-r/test sudo apt-get update - sudo apt-get install -y build-essential - sudo apt-get install -y wget + sudo apt-get install -y build-essential wget - - name: Install GNU make for MacOS - if: contains(matrix.os, 'macos') - run: brew install make || true + - name: Build Distributions + run: python -m build . - - name: list target wheels - run: | - python -m cibuildwheel . --print-build-identifiers - - - name: Build wheels - run: python -m cibuildwheel --output-dir wheelhouse - env: - CIBW_ENVIRONMENT_MACOS: PATH="$(brew --prefix)/opt/make/libexec/gnubin:$PATH" - MACOSX_DEPLOYMENT_TARGET: "10.09" - - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: - path: ./wheelhouse/*.whl - - - build_sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - name: Install Python - with: - python-version: '3.9' - - - name: Build sdist - run: | - python -m pip install cmake>=3.13 - python setup.py sdist - - - uses: actions/upload-artifact@v2 - with: - path: dist/*.tar.gz + name: distributables + path: ./dist/* upload_pypi: - needs: [build_wheels, build_sdist] - runs-on: ubuntu-latest + needs: [build_dists] + runs-on: ubuntu-22.04 steps: - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: - name: artifact + name: distributables path: dist - uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI }} - #repository_url: https://test.pypi.org/legacy/ - + # repository-url: https://test.pypi.org/legacy/ createPullRequest: - runs-on: ubuntu-latest + needs: [upload_pypi] + runs-on: ubuntu-22.04 steps: - name: Checkout code uses: actions/checkout@v4 - name: Create pull request run: | - gh pr create -B develop -H master --title 'Merge master into develop' --body 'This PR brings develop up to date with master for release.' + gh pr create -B develop \ + -H master \ + --title 'Merge master into develop' \ + --body 'This PR brings develop up to date with master for release.' env: GH_TOKEN: ${{ github.token }} diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 2e3463e5b..e3c808410 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -45,10 +45,11 @@ env: HOMEBREW_NO_GITHUB_API: "ON" HOMEBREW_NO_INSTALL_CLEANUP: "ON" DEBIAN_FRONTEND: "noninteractive" # Disable interactive apt install sessions + GIT_CLONE_PROTECTION_ACTIVE: false jobs: run_tests: - name: Run tests ${{ matrix.subset }} with ${{ matrix.os }}, Python ${{ matrix.py_v}}, RedisAI ${{ matrix.rai }} + name: Run tests ${{ matrix.subset }} with ${{ matrix.os }}, Python ${{ matrix.py_v}} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -62,9 +63,6 @@ jobs: - os: macos-14 py_v: "3.9" - env: - SMARTSIM_REDISAI: ${{ matrix.rai }} - steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -108,19 +106,13 @@ jobs: - name: Install SmartSim (with ML backends) run: | python -m pip install git+https://github.com/CrayLabs/SmartRedis.git@develop#egg=smartredis - python -m pip install .[dev,ml] - - - name: Install ML Runtimes with Smart (with pt, tf, and onnx support) - if: contains( matrix.os, 'ubuntu' ) || contains( matrix.os, 'macos-12') - run: smart build --device cpu --onnx -v + python -m pip install .[dev,mypy] - - name: Install ML Runtimes with Smart (no ONNX,TF on Apple Silicon) - if: contains( matrix.os, 'macos-14' ) - run: smart build --device cpu --no_tf -v + - name: Install ML Runtimes + run: smart build --device cpu -v - name: Run mypy run: | - python -m pip install .[mypy] make check-mypy - name: Run Pylint @@ -164,7 +156,7 @@ jobs: retention-days: 5 - name: Upload Pytest coverage to Codecov - uses: codecov/codecov-action@v3.1.4 + uses: codecov/codecov-action@v4.5.0 with: fail_ci_if_error: false files: ./coverage.xml diff --git a/.gitignore b/.gitignore index 77b91d586..97132aff7 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ tests/test_output # Dependencies smartsim/_core/.third-party smartsim/_core/.dragon +smartsim/_core/build # Docs _build diff --git a/.readthedocs.yaml b/.readthedocs.yaml index cecdfe3bf..88f270ba7 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -23,7 +23,7 @@ build: - git clone --depth 1 https://github.com/CrayLabs/SmartRedis.git smartredis - git clone --depth 1 https://github.com/CrayLabs/SmartDashboard.git smartdashboard post_create_environment: - - python -m pip install .[dev] + - python -m pip install .[dev,docs] - cd smartredis; python -m pip install . - cd smartredis/doc; doxygen Doxyfile_c; doxygen Doxyfile_cpp; doxygen Doxyfile_fortran - ln -s smartredis/examples ./examples @@ -37,7 +37,3 @@ build: sphinx: configuration: doc/conf.py fail_on_warning: true - -python: - install: - - requirements: doc/requirements-doc.txt \ No newline at end of file diff --git a/.wci.yml b/.wci.yml index 6194f1939..cf53334c3 100644 --- a/.wci.yml +++ b/.wci.yml @@ -22,8 +22,8 @@ language: Python release: - version: 0.7.0 - date: 2024-05-14 + version: 0.8.0 + date: 2024-09-25 documentation: general: https://www.craylabs.org/docs/overview.html diff --git a/Makefile b/Makefile index bddbda722..457bb040a 100644 --- a/Makefile +++ b/Makefile @@ -150,11 +150,11 @@ tutorials-dev: @docker compose build tutorials-dev @docker run -p 8888:8888 smartsim-tutorials:dev-latest -# help: tutorials-prod - Build and start a docker container to run the tutorials (v0.7.0) +# help: tutorials-prod - Build and start a docker container to run the tutorials (v0.8.0) .PHONY: tutorials-prod tutorials-prod: @docker compose build tutorials-prod - @docker run -p 8888:8888 smartsim-tutorials:v0.7.0 + @docker run -p 8888:8888 smartsim-tutorials:v0.8.0 # help: diff --git a/README.md b/README.md index c0986042e..610d6608c 100644 --- a/README.md +++ b/README.md @@ -643,11 +643,11 @@ from C, C++, Fortran and Python with the SmartRedis Clients: 1.2.7 PyTorch - 2.0.1 + 2.1.0 TensorFlow\Keras - 2.13.1 + 2.15.0 ONNX diff --git a/conftest.py b/conftest.py index b0457522c..991c0d17b 100644 --- a/conftest.py +++ b/conftest.py @@ -120,7 +120,7 @@ def print_test_configuration() -> None: def pytest_configure() -> None: pytest.test_launcher = test_launcher - pytest.wlm_options = ["slurm", "pbs", "lsf", "pals", "dragon"] + pytest.wlm_options = ["slurm", "pbs", "lsf", "pals", "dragon", "sge"] account = get_account() pytest.test_account = account pytest.test_device = test_device diff --git a/doc/_static/version_names.json b/doc/_static/version_names.json index bc095f84a..8b127e586 100644 --- a/doc/_static/version_names.json +++ b/doc/_static/version_names.json @@ -1,7 +1,8 @@ { "version_names":[ "develop (unstable)", - "0.7.0 (stable)", + "0.8.0 (stable)", + "0.7.0", "0.6.2", "0.6.1", "0.6.0", @@ -15,6 +16,7 @@ "version_urls": [ "https://www.craylabs.org/develop/overview.html", "https://www.craylabs.org/docs/overview.html", + "https://www.craylabs.org/docs/versions/0.7.0/overview.html", "https://www.craylabs.org/docs/versions/0.6.2/overview.html", "https://www.craylabs.org/docs/versions/0.6.1/overview.html", "https://www.craylabs.org/docs/versions/0.6.0/overview.html", diff --git a/doc/changelog.md b/doc/changelog.md index 73ea36511..179f4cf26 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -9,12 +9,128 @@ Jump to: ## SmartSim +### 0.8.0 + +Released on 27 September, 2024 + +Description + +- Add instructions for Frontier to set the MIOPEN cache +- Refine Frontier documentation for proper use of miniforge3 +- Refactor to the RedisAI build to allow more flexibility in versions + and sources of ML backends +- Add Dockerfiles with GPU support +- Fine grain build support for GPUs +- Update Torch to 2.1.0, Tensorflow to 2.15.0 +- Better error messages in build process +- Allow specifying Model and Ensemble parameters with + number-like types (e.g. numpy types) +- Pin watchdog to 4.x +- Update codecov to 4.5.0 +- Remove build of Redis from setup.py +- Mitigate dependency installation issues +- Fix internal host name representation for Dragon backend +- Make dependencies more discoverable in setup.py +- Add hardware pinning capability when using dragon +- Pin NumPy version to 1.x +- New launcher support for SGE (and similar derivatives) +- Fix test outputs being created in incorrect directory +- Improve support for building SmartSim without ML backends +- Update packaging dependency +- Remove broken oss.redis.com URI blocking documentation generation + +Detailed Notes + +- On Frontier, the MIOPEN cache may need to be set prior to using + RedisAI in the ``smart validate``. The instructions for Frontier + have been updated accordingly. + ([SmartSim-PR727](https://github.com/CrayLabs/SmartSim/pull/727)) +- On Frontier, the recommended way to activate conda environments is + to go through source activate. This also means that ``conda init`` + is not needed. The instructions for Frontier have been updated to + reflect this. + ([SmartSim-PR719](https://github.com/CrayLabs/SmartSim/pull/719)) +- The RedisAIBuilder class was completely overhauled to allow users to + express a wider range of support for hardware/software stacks. This + will be extended to support ROCm, CUDA-11, and CUDA-12. + ([SmartSim-PR669](https://github.com/CrayLabs/SmartSim/pull/669)) +- Versions for each of these packages are no longer specified in an + internal class. Instead a default set of JSON files specifies the + sources and versions. Users can specify their own custom specifications + at smart build time. + ([SmartSim-PR669](https://github.com/CrayLabs/SmartSim/pull/669)) +- Because all build configuration has been moved to static files and all + backends are compiled during `smart build`, SmartSim can now be shipped as a + pure python wheel. + ([SmartSim-PR728](https://github.com/CrayLabs/SmartSim/pull/728)) +- Two new Dockerfiles are now provided (one each for 11.8 and 12.1) that + can be used to build a container to run the tutorials. No HPC support + should be expected at this time + ([SmartSim-PR669](https://github.com/CrayLabs/SmartSim/pull/669)) +- As a result of the previous change, SmartSim now requires C++17 and a + minimum Cuda version of 11.8 in order to build Torch 2.1.0. + ([SmartSim-PR669](https://github.com/CrayLabs/SmartSim/pull/669)) +- Error messages were not being interpolated correctly. This has been + addressed to provide more context when exposing error messages to users. + ([SmartSim-PR669](https://github.com/CrayLabs/SmartSim/pull/669)) +- The serializer would fail if a parameter for a Model or Ensemble + was specified as a numpy dtype. The constructors for these + methods now validate that the input is number-like and convert + them to strings + ([SmartSim-PR676](https://github.com/CrayLabs/SmartSim/pull/676)) +- Pin watchdog to 4.x because v5 introduces new types and requires + updates to the type-checking + ([SmartSim-PR690](https://github.com/CrayLabs/SmartSim/pull/690)) +- Update codecov to 4.5.0 to mitigate GitHub action failure + ([SmartSim-PR657](https://github.com/CrayLabs/SmartSim/pull/657)) +- The builder module was included in setup.py to allow us to ship the + main Redis binaries (not RedisAI) with installs from PyPI. To + allow easier maintenance of this file and enable future complexity + this has been removed. The Redis binaries will thus be built + by users during the `smart build` step +- Installation of mypy or dragon in separate build actions caused + some dependencies (typing_extensions, numpy) to be upgraded and + caused runtime failures. The build actions were tweaked to include + all optional dependencies to be considered by pip during resolution. + Additionally, the numpy version was capped on dragon installations. + ([SmartSim-PR653](https://github.com/CrayLabs/SmartSim/pull/653)) +- setup.py used to define dependencies in a way that was not amenable + to code scanning tools. Direct dependencies now appear directly + in the setup call and the definition of the SmartRedis version + has been removed + ([SmartSim-PR635](https://github.com/CrayLabs/SmartSim/pull/635)) +- The separate definition of dependencies for the docs in + requirements-doc.txt is now defined as an extra. + ([SmartSim-PR635](https://github.com/CrayLabs/SmartSim/pull/635)) +- The new major version release of Numpy is incompatible with modules + compiled against Numpy 1.x. For both SmartSim and SmartRedis we + request a 1.x version of numpy. This is needed in SmartSim because + some of the downstream dependencies request NumPy + ([SmartSim-PR623](https://github.com/CrayLabs/SmartSim/pull/623)) +- SGE is now a supported launcher for SmartSim. Users can now define + BatchSettings which will be monitored by the TaskManager. Additionally, + if the MPI implementation was built with SGE support, Orchestrators can + use `mpirun` without needing to specify the hosts + ([SmartSim-PR610](https://github.com/CrayLabs/SmartSim/pull/610)) +- Ensure outputs from tests are written to temporary `tests/test_output` directory +- Fix an error that would prevent ``smart build`` from moving a successfully + compiled RedisAI shared object to the install location expected by SmartSim + if no ML backend installations were found. Previously, this would effectively + require users to build and install an ML backend to use the SmartSim + orchestrator even if it was not necessary for their workflow. Users can + install SmartSim without ML backends by running + ``smart build --no_tf --no_pt`` and the RedisAI shared object will now be + placed in the expected location. + ([SmartSim-PR601](https://github.com/CrayLabs/SmartSim/pull/601)) +- Fix packaging failures due to deprecated `pkg_resources`. ([SmartSim-PR598](https://github.com/CrayLabs/SmartSim/pull/598)) + ### 0.7.0 Released on 14 May, 2024 Description +- Update tutorials and tutorial containers - Improve Dragon server shutdown - Add dragon runtime installer - Add launcher based on Dragon @@ -64,6 +180,8 @@ Description Detailed Notes +- The tutorials are up-to date with SmartSim and SmartRedis APIs. Additionally, + the tutorial containers' Docker files are updated. ([SmartSim-PR589](https://github.com/CrayLabs/SmartSim/pull/589)) - The Dragon server will now terminate any process which is still running when a request of an immediate shutdown is sent. ([SmartSim-PR582](https://github.com/CrayLabs/SmartSim/pull/582)) - Add `--dragon` option to `smart build`. Install appropriate Dragon diff --git a/doc/conf.py b/doc/conf.py index 932bce013..8f3a9ca63 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -29,7 +29,7 @@ import smartsim version = smartsim.__version__ except ImportError: - version = "0.7.0" + version = "0.8.0" # The full version, including alpha/beta/rc tags release = version diff --git a/doc/dragon.rst b/doc/dragon.rst index 0bf6a8ea3..e19b40e4b 100644 --- a/doc/dragon.rst +++ b/doc/dragon.rst @@ -65,6 +65,34 @@ In the next sections, we detail how Dragon is integrated into SmartSim. For more information on HPC launchers, visit the :ref:`Run Settings` page. +Hardware Pinning +================ + +Dragon also enables users to specify hardware constraints using ``DragonRunSettings``. CPU +and GPU affinity can be specified using the ``DragonRunSettings`` object. The following +example demonstrates how to specify CPU affinity and GPU affinities simultaneously. Note +that affinities are passed as a list of device indices. + +.. code-block:: python + + # Because "dragon" was specified as the launcher during Experiment initialization, + # create_run_settings will return a DragonRunSettings object + rs = exp.create_run_settings(exe="mpi_app", + exe_args=["--option", "value"], + env_vars={"MYVAR": "VALUE"}) + + # Request the first 8 CPUs for this job + rs.set_cpu_affinity(list(range(9))) + + # Request the first two GPUs on the node for this job + rs.set_gpu_affinity([0, 1]) + +.. note:: + + SmartSim launches jobs in the order they are received on the first available + host in a round-robin pattern. To ensure a process is launched on a node with + specific features, configure a hostname constraint. + ================= The Dragon Server ================= diff --git a/doc/installation_instructions/basic.rst b/doc/installation_instructions/basic.rst index 02c17e1fd..226ccb085 100644 --- a/doc/installation_instructions/basic.rst +++ b/doc/installation_instructions/basic.rst @@ -18,7 +18,7 @@ Prerequisites Basic ===== -The base prerequisites to install SmartSim and SmartRedis are: +The base prerequisites to install SmartSim and SmartRedis wtih CPU-only support are: - Python 3.9-3.11 - Pip @@ -27,13 +27,11 @@ The base prerequisites to install SmartSim and SmartRedis are: - C++ compiler - GNU Make > 4.0 - git - - `git-lfs`_ - -.. _git-lfs: https://github.com/git-lfs/git-lfs?utm_source=gitlfs_site&utm_medium=installation_link&utm_campaign=gitlfs .. note:: - GCC 5-9, 11, and 12 is recommended. There are known bugs with GCC 10. + GCC 9, 11-13 is recommended (here are known issues compiling with GCC 10). For + CUDA 11.8, GCC 9 or 11 must be used. .. warning:: @@ -43,66 +41,146 @@ The base prerequisites to install SmartSim and SmartRedis are: `which gcc g++` do not point to Apple Clang. -GPU Support -=========== +ML Library Support +================== -The machine-learning backends have additional requirements in order to -use GPUs for inference +We currently support both Nvidia and AMD GPUs when using RedisAI for GPU inference. The support +for these GPUs often depends on the version of the CUDA or ROCm stack that is availble on your +machine. In _most_ cases, the versions backwards compatible. If you encounter problems, please +contact us and we can build the backend libraries for your desired version of CUDA and ROCm. - - `CUDA Toolkit 11 (tested with 11.8) `_ - - `cuDNN 8 (tested with 8.9.1) `_ - - OS: Linux - - GPU: Nvidia +CPU backends are provided for Apple (both Intel and Apple Silicon) and Linux (x86_64). -Be sure to reference the :ref:`installation notes ` for helpful +Be sure to reference the table below to find which versions of the ML libraries are supported for +your particular platform. Additional, see :ref:`installation notes ` for helpful information regarding various system types before installation. -================== -Supported Versions -================== +Linux +----- +.. tabs:: -.. list-table:: Supported System for Pre-built Wheels - :widths: 50 50 50 50 - :header-rows: 1 - :align: center + .. group-tab:: CUDA 11 + + Additional requirements: + + * GCC <= 11 + * CUDA Toolkit 11.7 or 11.8 + * cuDNN 8.9 + + .. list-table:: Nvidia CUDA 11 + :widths: 50 50 50 50 + :header-rows: 1 + :align: center + + * - Python Versions + - Torch + - Tensorflow + - ONNX Runtime + * - 3.9-3.11 + - 2.3.1 + - 2.14.1 + - 1.17.3 + + .. group-tab:: CUDA 12 + + Additional requirements: + + * CUDA Toolkit 12 + * cuDNN 8.9 + + .. list-table:: Nvidia CUDA 12 + :widths: 50 50 50 50 + :header-rows: 1 + :align: center + + * - Python Versions + - Torch + - Tensorflow + - ONNX Runtime + * - 3.9-3.11 + - 2.3.1 + - 2.17 + - 1.17.3 + + .. group-tab:: ROCm 6 + + .. list-table:: AMD ROCm 6.1 + :widths: 50 50 50 50 + :header-rows: 1 + :align: center + + * - Python Versions + - Torch + - Tensorflow + - ONNX Runtime + * - 3.9-3.11 + - 2.4.1 + - N/A + - N/A + + .. group-tab:: CPU + + .. list-table:: CPU-only + :widths: 50 50 50 50 + :header-rows: 1 + :align: center + + * - Python Versions + - Torch + - Tensorflow + - ONNX Runtime + * - 3.9-3.11 + - 2.4.0 + - 2.15 + - 1.17.3 + +MacOSX +------ - * - Platform - - CPU - - GPU - - Python Versions - * - MacOS - - x86_64, aarch64 - - Not supported - - 3.9 - 3.11 - * - Linux - - x86_64 - - Nvidia - - 3.9 - 3.11 +.. tabs:: + .. group-tab:: Apple Silicon -.. note:: + .. list-table:: Apple Silicon ARM64 (no Metal support) + :widths: 50 50 50 50 + :header-rows: 1 + :align: center - Users have succesfully run SmartSim on Windows using Windows Subsystem for Linux - with Nvidia support. Generally, users should follow the Linux instructions here, - however we make no guarantee or offer of support. + * - Python Versions + - Torch + - Tensorflow + - ONNX Runtime + * - 3.9-3.11 + - 2.4.0 + - 2.17 + - 1.17.3 + .. group-tab:: Intel Mac (x86) -Native support for various machine learning libraries and their -versions is dictated by our dependency on RedisAI_ 1.2.7. + .. list-table:: CPU-only + :widths: 50 50 50 50 + :header-rows: 1 + :align: center -+------------------+----------+-------------+---------------+ -| RedisAI | PyTorch | Tensorflow | ONNX Runtime | -+==================+==========+=============+===============+ -| 1.2.7 (default) | 2.0.1 | 2.13.1 | 1.16.3 | -+------------------+----------+-------------+---------------+ + * - Python Versions + - Torch + - Tensorflow + - ONNX Runtime + * - 3.9-3.11 + - 2.2.0 + - 2.15 + - 1.17.3 -.. warning:: - On Apple Silicon, only the PyTorch backend is supported for now. Please contact us - if you need support for other backends +.. note:: -TensorFlow_ 2.0 and Keras_ are supported through `graph freezing`_. + Users have succesfully run SmartSim on Windows using Windows Subsystem for Linux + with Nvidia support. Generally, users should follow the Linux instructions here, + however we make no guarantee or offer of support. + + +TensorFlow_ and Keras_ are supported through `graph freezing`_. ScikitLearn_ and Spark_ models are supported by SmartSim as well through the use of the ONNX_ runtime (which is not built by @@ -167,21 +245,8 @@ and install SmartSim from PyPI with the following command: pip install smartsim -If you would like SmartSim to also install python machine learning libraries -that can be used outside SmartSim to build SmartSim-compatible models, you -can request their installation through the ``[ml]`` optional dependencies, -as follows: - -.. code-block:: bash - - # For bash - pip install smartsim[ml] - # For zsh - pip install smartsim\[ml\] - -At this point, SmartSim is installed and can be used for more basic features. -If you want to use the machine learning features of SmartSim, you will need -to install the ML backends in the section below. +At this point, SmartSim can be used for describing and launching experiments, but +without any database/feature store functionality which allows for ML-enabled workflows. Step 2: Build SmartSim @@ -198,19 +263,19 @@ To see all the installation options: smart --help -CPU Install ------------ - -To install the default ML backends for CPU, run - .. code-block:: bash # run one of the following - smart build --device cpu # install PT and TF for cpu - smart build --device cpu --onnx # install all backends (PT, TF, ONNX) on cpu + smart build --device cpu # For unaccelerated AI/ML loads + smart build --device cuda118 # Nvidia Accelerator with CUDA 11.8 + smart build --device cuda125 # Nvidia Accelerator with CUDA 12.5 + smart build --device rocm57 # AMD Accelerator with ROCm 5.7.0 -By default, ``smart`` will install PyTorch and TensorFlow backends -for use in SmartSim. +By default, ``smart`` will install all backends available for the specified accelerator +_and_ the compatible versions of the Python packages associated with the backends. To +disable support for a specific backend, ``smart build`` accepts the flags +``--skip-torch``, ``--skip-tensorflow``, ``--skip-onnx`` which can also be used in +combination. .. note:: @@ -218,19 +283,6 @@ for use in SmartSim. all of the previous installs for the ML backends and ``smart clobber`` will remove all pre-built dependencies as well as the ML backends. - -GPU Install ------------ - -With the proper environment setup (see :ref:`GPU support`) the only difference -to building SmartSim with GPU support is to specify a different ``device`` - -.. code-block:: bash - - # run one of the following - smart build --device gpu # install PT and TF for gpu - smart build --device gpu --onnx # install all backends (PT, TF, ONNX) on gpu - .. note:: GPU builds can be troublesome due to the way that RedisAI and the ML-package @@ -251,9 +303,7 @@ For example, to install dragon alongside the RedisAI CPU backends, you can run .. code-block:: bash - # run one of the following smart build --device cpu --dragon # install Dragon, PT and TF for cpu - smart build --device cpu --onnx --dragon # install Dragon and all backends (PT, TF, ONNX) on cpu .. note:: Dragon is only supported on Linux systems. For further information, you @@ -319,35 +369,11 @@ source remains at the site of the clone instead of in site-packages. .. code-block:: bash cd smartsim - pip install -e .[dev,ml] # for bash users - pip install -e .\[dev,ml\] # for zsh users - -Use the now installed ``smart`` cli to install the machine learning runtimes and dragon. - -.. tabs:: - - .. tab:: Linux - - .. code-block:: bash - - # run one of the following - smart build --device cpu --onnx --dragon # install with cpu-only support - smart build --device gpu --onnx --dragon # install with both cpu and gpu support - - - .. tab:: MacOS (Intel x64) - - .. code-block:: bash - - smart build --device cpu --onnx # install all backends (PT, TF, ONNX) on gpu - - - .. tab:: MacOS (Apple Silicon) - - .. code-block:: bash - - smart build --device cpu --no_tf # Only install PyTorch (TF/ONNX unsupported) + pip install -e .[dev] # for bash users + pip install -e ".[dev]" # for zsh users +Use the now installed ``smart`` cli to install the machine learning runtimes and +dragon. Referring to "Step 2: Build SmartSim above". Build the SmartRedis library ============================ diff --git a/doc/installation_instructions/platform.rst b/doc/installation_instructions/platform.rst index 086fc2951..057a25d87 100644 --- a/doc/installation_instructions/platform.rst +++ b/doc/installation_instructions/platform.rst @@ -12,6 +12,8 @@ that SmartSim may be used on. .. include:: platform/frontier.rst +.. include:: platform/perlmutter.rst + .. include:: platform/cray.rst .. include:: platform/ncar-cheyenne.rst diff --git a/doc/installation_instructions/platform/frontier.rst b/doc/installation_instructions/platform/frontier.rst index e23856155..9b05061fe 100644 --- a/doc/installation_instructions/platform/frontier.rst +++ b/doc/installation_instructions/platform/frontier.rst @@ -1,23 +1,15 @@ OLCF Frontier ============= -Summary -------- - -Frontier is an AMD CPU/AMD GPU system. - -As of 2023-07-06, users can use the following instructions, however we -anticipate that all the SmartSim dependencies will be available system-wide via -the modules system. - Known limitations ----------------- We are continually working on getting all the features of SmartSim working on Frontier, however we do have some known limitations: -* For now, only Torch models are supported. We are working to find a recipe to - install Tensorflow with ROCm support from scratch +* For now, only Torch models are supported. If you need Tensorflow or ONNX + support please contact us +* All SmartSim experiments must be run from Lustre, _not_ your home directory * The colocated database will fail without specifying ``custom_pinning``. This is because the default pinning assumes that processor 0 is available, but the 'low-noise' default on Frontier reserves the processor on each NUMA node. @@ -30,8 +22,8 @@ Frontier, however we do have some known limitations: Please raise an issue in the SmartSim Github or contact the developers if the above issues are affecting your workflow or if you find any other problems. -Build process -------------- +One-time Setup +-------------- To install the SmartRedis and SmartSim python packages on Frontier, please follow these instructions, being sure to set the following variables @@ -39,25 +31,20 @@ these instructions, being sure to set the following variables .. code:: bash export PROJECT_NAME=CHANGE_ME - export VENV_NAME=CHANGE_ME -Then continue with the install: +**Step 1:** Create and activate a virtual environment for SmartSim: .. code:: bash - module load PrgEnv-gnu-amd git-lfs cmake cray-python - module unload xalt amd-mixed - module load rocm/4.5.2 - export CC=gcc - export CXX=g++ + module load PrgEnv-gnu miniforge3 rocm/6.1.3 export SCRATCH=/lustre/orion/$PROJECT_NAME/scratch/$USER/ - export VENV_HOME=$SCRATCH/$VENV_NAME/ + conda create -n smartsim python=3.11 + source activate smartsim - python3 -m venv $VENV_HOME - source $VENV_HOME/bin/activate - pip install torch==1.11.0+rocm4.5.2 torchvision==0.12.0+rocm4.5.2 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/rocm4.5.2 +**Step 2:** Build the SmartRedis C++ and Fortran libraries: +.. code:: bash cd $SCRATCH git clone https://github.com/CrayLabs/SmartRedis.git @@ -65,57 +52,61 @@ Then continue with the install: make lib-with-fortran pip install . - # Download SmartSim and site-specific files +**Step 3:** Install SmartSim in the conda environment: + +.. code:: bash + cd $SCRATCH - git clone https://github.com/CrayLabs/site-deployments.git - git clone https://github.com/CrayLabs/SmartSim.git - cd SmartSim - pip install -e .[dev] + pip install git+https://github.com/CrayLabs/SmartSim.git -Next to finish the compilation, we need to manually modify one of the auxiliary -cmake files that comes packaged with Torch +**Step 4:** Build Redis, RedisAI, the backends, and all the Python packages: .. code:: bash - export TORCH_CMAKE_DIR=$(python -c 'import torch;print(torch.utils.cmake_prefix_path)') - # Manual step: modify all references to the 'rocm' directory to rocm-4.5.2 - vim $TORCH_CMAKE_DIR/Caffe2/Caffe2Targets.cmake + smart build --device=rocm-6 -Finally, build Redis (or keydb for a more performant solution), RedisAI, and the -machine-learning backends using: +**Step 5:** Check that SmartSim has been installed and built correctly: .. code:: bash - KEYDB_FLAG="" # set this to --keydb if desired - smart build --device gpu --torch_dir $TORCH_CMAKE_DIR --no_tf -v $(KEYDB_FLAG) + # Optimizations for inference + export MIOPEN_USER_DB_PATH="/tmp/${USER}/my-miopen-cache" + export MIOPEN_CUSTOM_CACHE_DIR=$MIOPEN_USER_DB_PATH + rm -rf $MIOPEN_USER_DB_PATH + mkdir -p $MIOPEN_USER_DB_PATH + + # Run the install validation utility + smart validate --device gpu -Set up environment ------------------- +The following output indicates a successful install: + +.. code:: bash + + [SmartSim] INFO Verifying Tensor Transfer + [SmartSim] INFO Verifying Torch Backend + 16:26:35 login SmartSim[557020:MainThread] INFO Success! + +Post-installation +----------------- Before running SmartSim, the environment should match the one used to -build, and some variables should be set to work around some ROCm PyTorch -issues: +build, and some variables should be set to optimize performance: .. code:: bash # Set these to the same values that were used for install export PROJECT_NAME=CHANGE_ME - export VENV_NAME=CHANGE_ME .. code:: bash - module load PrgEnv-gnu-amd git-lfs cmake cray-python - module unload xalt amd-mixed - module load rocm/4.5.2 + module load PrgEnv-gnu miniforge3 rocm/6.1.3 + source activate smartsim - export SCRATCH=/lustre/orion/$PROJECT_NAME/scratch/$USER/ - export MIOPEN_USER_DB_PATH=/tmp/miopendb/ - export MIOPEN_SYSTEM_DB_PATH=$MIOPEN_USER_DB_PATH - mkdir -p $MIOPEN_USER_DB_PATH - export MIOPEN_DISABLE_CACHE=1 - export VENV_HOME=$SCRATCH/$VENV_NAME/ - source $VENV_HOME/bin/activate - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$VENV_HOME/lib/python3.9/site-packages/torch/lib + # Optimizations for inference + export MIOPEN_USER_DB_PATH="/tmp/${USER}/my-miopen-cache" + export MIOPEN_CUSTOM_CACHE_DIR=${MIOPEN_USER_DB_PATH} + rm -rf ${MIOPEN_USER_DB_PATH} + mkdir -p ${MIOPEN_USER_DB_PATH} Binding DBs to Slingshot ------------------------ @@ -129,17 +120,3 @@ following way: exp = Experiment("my_exp", launcher="slurm") orc = exp.create_database(db_nodes=3, interface=["hsn0","hsn1","hsn2","hsn3"], single_cmd=True) - -Running tests -------------- - -The same environment set to run SmartSim must be set to run tests. The -environment variables needed to run the test suite are the following: - -.. code:: bash - - export SMARTSIM_TEST_ACCOUNT=PROJECT_NAME # Change this to above - export SMARTSIM_TEST_LAUNCHER=slurm - export SMARTSIM_TEST_DEVICE=gpu - export SMARTSIM_TEST_PORT=6789 - export SMARTSIM_TEST_INTERFACE="hsn0,hsn1,hsn2,hsn3" diff --git a/doc/installation_instructions/platform/olcf-summit.rst b/doc/installation_instructions/platform/olcf-summit.rst index 236d15054..07be24eec 100644 --- a/doc/installation_instructions/platform/olcf-summit.rst +++ b/doc/installation_instructions/platform/olcf-summit.rst @@ -6,10 +6,10 @@ Since SmartSim does not have a built PowerPC build, the build steps for an IBM system are slightly different than other systems. Luckily for us, a conda channel with all relevant packages is maintained as part -of the `OpenCE `_ initiative. Users can follow these -instructions to get a working SmartSim build with PyTorch and TensorFlow for GPU -on Summit. Note that SmartSim and SmartRedis will be downloaded to the working -directory from which these instructions are executed. +of the `OpenCE `_ +initiative. Users can follow these instructions to get a working SmartSim build +with PyTorch and TensorFlow for GPU on Summit. Note that SmartSim and SmartRedis +will be downloaded to the working directory from which these instructions are executed. Note that the available PyTorch version (1.10.2) does not match the one expected by RedisAI 1.2.7 (1.11): it is still compatible and should @@ -19,7 +19,7 @@ into problems. .. code-block:: bash # setup Python and build environment - export ENV_NAME=smartsim-0.7.0 + export ENV_NAME=smartsim-0.8.0 git clone https://github.com/CrayLabs/SmartRedis.git smartredis git clone https://github.com/CrayLabs/SmartSim.git smartsim conda config --prepend channels https://ftp.osuosl.org/pub/open-ce/1.6.1/ diff --git a/doc/installation_instructions/platform/perlmutter.rst b/doc/installation_instructions/platform/perlmutter.rst new file mode 100644 index 000000000..71f97a4dc --- /dev/null +++ b/doc/installation_instructions/platform/perlmutter.rst @@ -0,0 +1,64 @@ +NERSC Perlmutter +================ + +One-time Setup +-------------- + +To install SmartSim on Perlmutter, follow these steps: + +**Step 1:** Create and activate a conda environment for SmartSim: + +.. code:: bash + + module load conda cudatoolkit/12.2 cudnn/8.9.3_cuda12 PrgEnv-gnu + conda create -n smartsim python=3.11 + conda activate smartsim + +**Step 2:** Build the SmartRedis C++ and Fortran libraries: + +.. code:: bash + + git clone https://github.com/CrayLabs/SmartRedis.git + cd SmartRedis + make lib-with-fortran + pip install . + cd .. + +**Step 3:** Install SmartSim in the conda environment: + +.. code:: bash + + pip install git+https://github.com/CrayLabs/SmartSim.git + +**Step 4:** Build Redis, RedisAI, the backends, and all the Python packages: + +.. code:: bash + + smart build --device=cuda-12 + +**Step 5:** Check that SmartSim has been installed and built correctly: + +.. code:: bash + + smart validate --device gpu + +The following output indicates a successful install: + +.. code:: bash + + [SmartSim] INFO Verifying Tensor Transfer + [SmartSim] INFO Verifying Torch Backend + [SmartSim] INFO Verifying ONNX Backend + [SmartSim] INFO Verifying TensorFlow Backend + 16:26:35 login SmartSim[557020:MainThread] INFO Success! + +Post-installation +----------------- + +After completing the above steps to install SmartSim in a conda environment, you +can reload the conda environment by running the following commands: + +.. code:: bash + + module load conda cudatoolkit/12.2 cudnn/8.9.3_cuda12 PrgEnv-gnu + conda activate smartsim diff --git a/doc/installation_instructions/site-install.rst b/doc/installation_instructions/site-install.rst index 26ecd6c13..53e0ff8bf 100644 --- a/doc/installation_instructions/site-install.rst +++ b/doc/installation_instructions/site-install.rst @@ -11,5 +11,5 @@ from source with the following steps replacing ``COMPILER_VERSION`` and module use -a /lus/scratch/smartsim/local/modulefiles module load cudatoolkit/11.8 cudnn smartsim-deps/COMPILER_VERSION/SMARTSIM_VERSION - pip install smartsim[ml] - smart build --only_python_packages --device gpu [--onnx] + pip install smartsim + smart build --skip-backends --device gpu [--onnx] diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt deleted file mode 100644 index 696881bef..000000000 --- a/doc/requirements-doc.txt +++ /dev/null @@ -1,18 +0,0 @@ -Sphinx==6.2.1 -breathe==4.35.0 -sphinx-fortran==1.1.1 -sphinx-book-theme==1.0.1 -sphinx-copybutton==0.5.2 -sphinx-tabs==3.4.4 -nbsphinx==0.9.3 -docutils==0.18.1 -torch==2.0.1 -tensorflow==2.13.1 -ipython -jinja2==3.1.2 -protobuf -numpy -sphinx-design -pypandoc -sphinx-autodoc-typehints -myst_parser diff --git a/doc/tutorials/getting_started/getting_started.ipynb b/doc/tutorials/getting_started/getting_started.ipynb index 0a5230b0f..e2caf0070 100644 --- a/doc/tutorials/getting_started/getting_started.ipynb +++ b/doc/tutorials/getting_started/getting_started.ipynb @@ -24,7 +24,8 @@ "metadata": {}, "outputs": [], "source": [ - "from smartsim import Experiment" + "import os\n", + "from smartsim import Experiment\n" ] }, { @@ -38,6 +39,7 @@ " * `pbs`\n", " * `lsf`\n", " * `local` (single node/laptops)\n", + " * `dragon`\n", " * `auto`\n", "\n", "If `launcher=\"auto\"` is used, the experiment will attempt to find a launcher on the system, and use the first one it encounters. If a launcher cannot be found or no launcher parameter is provided, the default value of `launcher=\"local\"` will be used. \n", @@ -52,7 +54,7 @@ "outputs": [], "source": [ "# Init Experiment and specify to launch locally\n", - "exp = Experiment(name=\"getting-started\", launcher=\"local\")" + "exp = Experiment(name=\"getting-started\", launcher=\"local\")\n" ] }, { @@ -78,7 +80,7 @@ "settings = exp.create_run_settings(exe=\"echo\", exe_args=\"hello!\", run_command=None)\n", "\n", "# create the simple model instance so we can run it.\n", - "M1 = exp.create_model(name=\"tutorial-model\", run_settings=settings)" + "M1 = exp.create_model(name=\"tutorial-model\", run_settings=settings)\n" ] }, { @@ -101,7 +103,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:18:27 e3fbeabfdb3e SmartSim[1408] INFO \n", + "19:17:29 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO \n", "\n", "=== Launch Summary ===\n", "Experiment: getting-started\n", @@ -112,37 +114,24 @@ "\n", "=== Models ===\n", "tutorial-model\n", - "Executable: /usr/bin/echo\n", + "Executable: /bin/echo\n", "Executable Arguments: hello!\n", "\n", "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00:18:39 e3fbeabfdb3e SmartSim[1408] INFO tutorial-model(1428): Completed\n" + "\n", + "19:17:32 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO tutorial-model(97213): SmartSimStatus.STATUS_COMPLETED\n" ] } ], "source": [ - "exp.start(M1, block=True, summary=True)" + "exp.start(M1, block=True, summary=True)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The model has completed. Let's look at the content of the current working directory. We can see that two files, `tutorial-model.out` and `tutorial-model.err` have been created." + "The model has completed. Let's look at the content of the current working directory. Two files, `tutorial-model.out` and `tutorial-model.err` have been created in the `Model` path. To make their inspection easier, we can define a helper function." ] }, { @@ -163,15 +152,21 @@ } ], "source": [ - "outputfile = './tutorial-model.out'\n", - "errorfile = './tutorial-model.err'\n", + "def get_files(model):\n", + " \"\"\"Get output and error file of a Model\"\"\"\n", + " outputfile = os.path.join(model.path, model.name+\".out\")\n", + " errorfile = os.path.join(model.path, model.name+\".err\")\n", + "\n", + " return outputfile, errorfile\n", + "\n", + "outputfile, errorfile = get_files(M1)\n", "\n", "print(\"Content of tutorial-model.out:\")\n", "with open(outputfile, 'r') as fin:\n", " print(fin.read())\n", "print(\"Content of tutorial-model.err:\")\n", "with open(errorfile, 'r') as fin:\n", - " print(fin.read())" + " print(fin.read())\n" ] }, { @@ -192,9 +187,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:18:45 e3fbeabfdb3e SmartSim[1408] INFO tutorial-model-1(1431): Completed\n", - "00:18:48 e3fbeabfdb3e SmartSim[1408] INFO tutorial-model-2(1432): Running\n", - "00:18:49 e3fbeabfdb3e SmartSim[1408] INFO tutorial-model-2(1432): Completed\n" + "19:17:37 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO tutorial-model-1(97239): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:40 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO tutorial-model-2(97250): SmartSimStatus.STATUS_RUNNING\n", + "19:17:41 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO tutorial-model-2(97250): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -203,7 +198,7 @@ "run_settings_2 = exp.create_run_settings(exe=\"sleep\", exe_args=\"5\", run_command=None)\n", "model_1 = exp.create_model(\"tutorial-model-1\", run_settings_1)\n", "model_2 = exp.create_model(\"tutorial-model-2\", run_settings_2)\n", - "exp.start(model_1, model_2)" + "exp.start(model_1, model_2)\n" ] }, { @@ -224,7 +219,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:18:53 e3fbeabfdb3e SmartSim[1408] INFO \n", + "19:17:45 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO \n", "\n", "=== Launch Summary ===\n", "Experiment: getting-started\n", @@ -235,28 +230,15 @@ "\n", "=== Models ===\n", "tutorial-model-mpirun\n", - "Executable: /usr/bin/echo\n", + "Executable: /bin/echo\n", "Executable Arguments: hello world!\n", - "Run Command: mpirun\n", + "Run Command: /usr/local/bin/mpirun\n", "Run Arguments:\n", "\tn = 2\n", "\n", "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00:19:05 e3fbeabfdb3e SmartSim[1408] INFO tutorial-model-mpirun(1435): Completed\n" + "\n", + "19:17:47 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO tutorial-model-mpirun(97310): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -269,7 +251,7 @@ "\n", "# create and start the MPI model\n", "ompi_model = exp.create_model(\"tutorial-model-mpirun\", openmpi_settings)\n", - "exp.start(ompi_model, summary=True)" + "exp.start(ompi_model, summary=True)\n" ] }, { @@ -296,7 +278,7 @@ } ], "source": [ - "outputfile = './tutorial-model-mpirun.out'\n", + "outputfile, _ = get_files(ompi_model)\n", "\n", "print(\"Content of tutorial-model-mpirun.out:\")\n", "with open(outputfile, 'r') as fin:\n", @@ -320,7 +302,7 @@ "source": [ "# define how we want each ensemble member to execute\n", "# in this case we create settings to execute \"sleep 3\"\n", - "ens_settings = exp.create_run_settings(exe=\"sleep\", exe_args=\"3\")" + "ens_settings = exp.create_run_settings(exe=\"sleep\", exe_args=\"3\")\n" ] }, { @@ -339,41 +321,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:19:08 e3fbeabfdb3e SmartSim[1408] INFO \n", + "19:17:50 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO \n", "\n", "=== Launch Summary ===\n", "Experiment: getting-started\n", "Experiment Path: /home/craylabs/tutorials/getting_started/getting-started\n", "Launcher: local\n", - "Ensembles: 1\n", "Database Status: inactive\n", "\n", "=== Ensembles ===\n", "ensemble-replica\n", "Members: 4\n", - "Batch Launch: False\n", + "Batch Launch: None\n", "\n", "\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00:19:24 e3fbeabfdb3e SmartSim[1408] INFO ensemble-replica_0(1443): Completed\n", - "00:19:24 e3fbeabfdb3e SmartSim[1408] INFO ensemble-replica_2(1445): Completed\n", - "00:19:24 e3fbeabfdb3e SmartSim[1408] INFO ensemble-replica_1(1444): Completed\n", - "00:19:25 e3fbeabfdb3e SmartSim[1408] INFO ensemble-replica_3(1446): Completed\n", - "00:19:26 e3fbeabfdb3e SmartSim[1408] INFO ensemble-replica_1(1444): Completed\n", - "00:19:26 e3fbeabfdb3e SmartSim[1408] INFO ensemble-replica_3(1446): Completed\n" + "\n", + "19:17:55 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble-replica_0(97347): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:56 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO ensemble-replica_1(97348): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:56 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO ensemble-replica_2(97349): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:56 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO ensemble-replica_3(97350): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:57 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble-replica_1(97348): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:57 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble-replica_2(97349): SmartSimStatus.STATUS_COMPLETED\n", + "19:17:57 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble-replica_3(97350): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -382,7 +351,7 @@ " replicas=4,\n", " run_settings=ens_settings)\n", "\n", - "exp.start(ensemble, summary=True)" + "exp.start(ensemble, summary=True)\n" ] }, { @@ -420,7 +389,7 @@ "metadata": {}, "outputs": [], "source": [ - "rs = exp.create_run_settings(exe=\"python\", exe_args=\"output_my_parameter.py\")" + "rs = exp.create_run_settings(exe=\"python\", exe_args=\"output_my_parameter.py\")\n" ] }, { @@ -446,12 +415,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:19:30 e3fbeabfdb3e SmartSim[1408] INFO Working in previously created experiment\n", - "00:19:34 e3fbeabfdb3e SmartSim[1408] INFO ensemble_0(1449): Completed\n", - "00:19:34 e3fbeabfdb3e SmartSim[1408] INFO ensemble_1(1450): Completed\n", - "00:19:34 e3fbeabfdb3e SmartSim[1408] INFO ensemble_2(1451): Completed\n", - "00:19:35 e3fbeabfdb3e SmartSim[1408] INFO ensemble_3(1452): Completed\n", - "00:19:36 e3fbeabfdb3e SmartSim[1408] INFO ensemble_3(1452): Completed\n" + "19:18:06 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_0(97408): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:06 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_1(97409): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:06 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_3(97421): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:07 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO ensemble_2(97410): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:08 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_2(97410): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -467,7 +435,7 @@ "ensemble.attach_generator_files(to_configure=config_file)\n", "\n", "exp.generate(ensemble, overwrite=True)\n", - "exp.start(ensemble)" + "exp.start(ensemble)\n" ] }, { @@ -486,16 +454,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Content of getting-started/ensemble/ensemble_0/ensemble_0.out:\n", + "Content of /home/craylabs/tutorials/getting_started/getting-started/ensemble/ensemble_0/ensemble_0.out:\n", "Hello, my name is Ellie and my parameter is 2\n", "\n", - "Content of getting-started/ensemble/ensemble_1/ensemble_1.out:\n", + "Content of /home/craylabs/tutorials/getting_started/getting-started/ensemble/ensemble_1/ensemble_1.out:\n", "Hello, my name is Ellie and my parameter is 11\n", "\n", - "Content of getting-started/ensemble/ensemble_2/ensemble_2.out:\n", + "Content of /home/craylabs/tutorials/getting_started/getting-started/ensemble/ensemble_2/ensemble_2.out:\n", "Hello, my name is John and my parameter is 2\n", "\n", - "Content of getting-started/ensemble/ensemble_3/ensemble_3.out:\n", + "Content of /home/craylabs/tutorials/getting_started/getting-started/ensemble/ensemble_3/ensemble_3.out:\n", "Hello, my name is John and my parameter is 11\n", "\n" ] @@ -503,7 +471,7 @@ ], "source": [ "for id in range(4):\n", - " outputfile = f\"getting-started/ensemble/ensemble_{id}/ensemble_{id}.out\"\n", + " outputfile, _ = get_files(ensemble.entities[id])\n", "\n", " print(f\"Content of {outputfile}:\")\n", " with open(outputfile, 'r') as fin:\n", @@ -526,9 +494,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:19:40 e3fbeabfdb3e SmartSim[1408] INFO Working in previously created experiment\n", - "00:19:45 e3fbeabfdb3e SmartSim[1408] INFO ensemble_0(1455): Completed\n", - "00:19:45 e3fbeabfdb3e SmartSim[1408] INFO ensemble_1(1456): Completed\n" + "19:18:17 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO param_ensemble_0(97484): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:17 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO param_ensemble_1(97495): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -537,12 +504,12 @@ " \"tutorial_name\": [\"Ellie\", \"John\"],\n", " \"tutorial_parameter\": [2, 11]\n", "}\n", - "ensemble = exp.create_ensemble(\"ensemble\", params=params, run_settings=rs, perm_strategy=\"random\", n_models=2)\n", + "ensemble = exp.create_ensemble(\"param_ensemble\", params=params, run_settings=rs, perm_strategy=\"random\", n_models=2)\n", "config_file = \"./output_my_parameter.py\"\n", "ensemble.attach_generator_files(to_configure=config_file)\n", "\n", "exp.generate(ensemble, overwrite=True)\n", - "exp.start(ensemble)" + "exp.start(ensemble)\n" ] }, { @@ -574,12 +541,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:19:46 e3fbeabfdb3e SmartSim[1408] INFO Working in previously created experiment\n", - "00:19:51 e3fbeabfdb3e SmartSim[1408] INFO ensemble_new_tag_0(1459): Completed\n", - "00:19:51 e3fbeabfdb3e SmartSim[1408] INFO ensemble_new_tag_1(1460): Completed\n", - "00:19:51 e3fbeabfdb3e SmartSim[1408] INFO ensemble_new_tag_2(1461): Completed\n", - "00:19:52 e3fbeabfdb3e SmartSim[1408] INFO ensemble_new_tag_3(1462): Completed\n", - "00:19:53 e3fbeabfdb3e SmartSim[1408] INFO ensemble_new_tag_3(1462): Completed\n" + "19:18:23 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_new_tag_0(97520): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:23 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_new_tag_1(97521): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:23 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_new_tag_3(97523): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:24 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO ensemble_new_tag_2(97522): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:25 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO ensemble_new_tag_2(97522): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -598,7 +564,7 @@ "ensemble.attach_generator_files(to_configure=config_file)\n", "\n", "exp.generate(ensemble, overwrite=True, tag='@')\n", - "exp.start(ensemble)" + "exp.start(ensemble)\n" ] }, { @@ -617,31 +583,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "| | Name | Entity-Type | JobID | RunID | Time | Status | Returncode |\n", - "|----|-----------------------|---------------|---------|---------|---------|-----------|--------------|\n", - "| 0 | tutorial-model | Model | 1428 | 0 | 2.00734 | Completed | 0 |\n", - "| 1 | tutorial-model-1 | Model | 1431 | 0 | 2.22411 | Completed | 0 |\n", - "| 2 | tutorial-model-2 | Model | 1432 | 0 | 5.98942 | Completed | 0 |\n", - "| 3 | tutorial-model-mpirun | Model | 1435 | 0 | 2.00939 | Completed | 0 |\n", - "| 4 | ensemble-replica_0 | Model | 1443 | 0 | 4.64557 | Completed | 0 |\n", - "| 5 | ensemble-replica_2 | Model | 1445 | 0 | 4.2261 | Completed | 0 |\n", - "| 6 | ensemble-replica_1 | Model | 1444 | 0 | 6.44562 | Completed | 0 |\n", - "| 7 | ensemble-replica_3 | Model | 1446 | 0 | 6.02451 | Completed | 0 |\n", - "| 8 | ensemble_2 | Model | 1451 | 0 | 4.22712 | Completed | 0 |\n", - "| 9 | ensemble_3 | Model | 1452 | 0 | 6.02064 | Completed | 0 |\n", - "| 10 | ensemble_0 | Model | 1449 | 0 | 4.64088 | Completed | 0 |\n", - "| 11 | ensemble_0 | Model | 1455 | 1 | 4.21892 | Completed | 0 |\n", - "| 12 | ensemble_1 | Model | 1450 | 0 | 4.43377 | Completed | 0 |\n", - "| 13 | ensemble_1 | Model | 1456 | 1 | 4.00995 | Completed | 0 |\n", - "| 14 | ensemble_new_tag_0 | Model | 1459 | 0 | 4.60659 | Completed | 0 |\n", - "| 15 | ensemble_new_tag_1 | Model | 1460 | 0 | 4.39902 | Completed | 0 |\n", - "| 16 | ensemble_new_tag_2 | Model | 1461 | 0 | 4.19067 | Completed | 0 |\n", - "| 17 | ensemble_new_tag_3 | Model | 1462 | 0 | 5.9866 | Completed | 0 |\n" + "| | Name | Entity-Type | JobID | RunID | Time | Status | Returncode |\n", + "|----|-----------------------|---------------|---------|---------|--------|---------------------------------|--------------|\n", + "| 0 | tutorial-model | Model | 97213 | 0 | 2.0073 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 1 | tutorial-model-1 | Model | 97239 | 0 | 2.2181 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 2 | tutorial-model-2 | Model | 97250 | 0 | 6.0111 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 3 | tutorial-model-mpirun | Model | 97310 | 0 | 2.0072 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 4 | ensemble-replica_0 | Model | 97347 | 0 | 4.6530 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 5 | ensemble-replica_1 | Model | 97348 | 0 | 6.4457 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 6 | ensemble-replica_2 | Model | 97349 | 0 | 6.2330 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 7 | ensemble-replica_3 | Model | 97350 | 0 | 6.0211 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 8 | ensemble_0 | Model | 97408 | 0 | 4.6442 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 9 | ensemble_1 | Model | 97409 | 0 | 4.4313 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 10 | ensemble_3 | Model | 97421 | 0 | 4.0064 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 11 | ensemble_2 | Model | 97410 | 0 | 6.2264 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 12 | param_ensemble_0 | Model | 97484 | 0 | 4.2159 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 13 | param_ensemble_1 | Model | 97495 | 0 | 4.0068 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 14 | ensemble_new_tag_0 | Model | 97520 | 0 | 4.6525 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 15 | ensemble_new_tag_1 | Model | 97521 | 0 | 4.4403 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 16 | ensemble_new_tag_3 | Model | 97523 | 0 | 4.0074 | SmartSimStatus.STATUS_COMPLETED | 0 |\n", + "| 17 | ensemble_new_tag_2 | Model | 97522 | 0 | 6.2288 | SmartSimStatus.STATUS_COMPLETED | 0 |\n" ] } ], "source": [ - "print(exp.summary())" + "print(exp.summary())\n" ] }, { @@ -655,7 +621,7 @@ "of an experiment and across multiple workloads. In order to stream data into or receive data from the Orchestrator,\n", "one of the SmartSim clients (SmartRedis) has to be used within your workload. \n", "\n", - "\"orchestrator-overview\"\n", + "
\"orchestrator-overview\"
\n", "\n", "The Orchestrator is capable of hosting and executing AI models written in Python on CPU or GPU.\n", "The Orchestrator supports models written with TensorFlow, Pytorch, or models saved in an ONNX format (e.g. scikit-learn).\n", @@ -664,7 +630,7 @@ "\n", "Orchestrators can either be deployed on a single host, or many hosts as shown in the diagram below. \n", "\n", - "\"orchestrator-cluster\"\n", + "
\"orchestrator-cluster\"
\n", "\n", "In this tutorial, a single-host host Orchestrator is deployed locally (as we specified `local` for the Experiment launcher)\n", "and used to demonstrate how to use the SmartRedis Python client within a workload." @@ -679,22 +645,14 @@ "from smartredis import Client\n", "import numpy as np\n", "\n", - "REDIS_PORT=6899" + "REDIS_PORT=6899\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "00:19:57 e3fbeabfdb3e SmartSim[1408] INFO Working in previously created experiment\n" - ] - } - ], + "outputs": [], "source": [ "# start a new Experiment for this section\n", "exp = Experiment(\"tutorial-smartredis\", launcher=\"local\")\n", @@ -707,7 +665,7 @@ "exp.generate(db)\n", "\n", "# start the database\n", - "exp.start(db)" + "exp.start(db)\n" ] }, { @@ -721,12 +679,21 @@ "cell_type": "code", "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SmartRedis Library@19-18-39:WARNING: Environment variable SR_LOG_FILE is not set. Defaulting to stdout\n", + "SmartRedis Library@19-18-39:WARNING: Environment variable SR_LOG_LEVEL is not set. Defaulting to INFO\n" + ] + } + ], "source": [ "# connect a SmartRedis client at the address supplied by the launched\n", "# Orchestrator instance.\n", "# Cluster=False as the Orchestrator was deployed on a single compute host (local)\n", - "client = Client(address=db.get_address()[0], cluster=False)" + "client = Client(address=db.get_address()[0], cluster=False)\n" ] }, { @@ -772,7 +739,7 @@ "\n", "receive_tensor = client.get_tensor(\"tutorial_tensor_1\")\n", "\n", - "print('Receive tensor:\\n\\n', receive_tensor)" + "print('Receive tensor:\\n\\n', receive_tensor)\n" ] }, { @@ -808,7 +775,7 @@ "module = torch.jit.trace(net, example_forward_input)\n", "\n", "# Save the traced model to a file\n", - "torch.jit.save(module, \"./torch_cnn.pt\")" + "torch.jit.save(module, \"./torch_cnn.pt\")\n" ] }, { @@ -822,10 +789,18 @@ "cell_type": "code", "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Default@19-18-41:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n" + ] + } + ], "source": [ "# Set the model in the Redis database from the file\n", - "client.set_model_from_file(\"tutorial-cnn\", \"./torch_cnn.pt\", \"TORCH\", \"CPU\")" + "client.set_model_from_file(\"tutorial-cnn\", \"./torch_cnn.pt\", \"TORCH\", \"CPU\")\n" ] }, { @@ -840,7 +815,7 @@ "\n", "# Run model and retrieve the output\n", "client.run_model(\"tutorial-cnn\", inputs=[\"torch_cnn_input\"], outputs=[\"torch_cnn_output\"])\n", - "out_data = client.get_tensor(\"torch_cnn_output\")" + "out_data = client.get_tensor(\"torch_cnn_output\")\n" ] }, { @@ -877,7 +852,7 @@ "sample_array_1 = np.array([np.arange(9.)])\n", "print(sample_array_1)\n", "print(\"Max:\")\n", - "print(max_of_tensor(sample_array_1))" + "print(max_of_tensor(sample_array_1))\n" ] }, { @@ -893,7 +868,7 @@ "metadata": {}, "outputs": [], "source": [ - "client.set_function(\"max-of-tensor\", max_of_tensor)" + "client.set_function(\"max-of-tensor\", max_of_tensor)\n" ] }, { @@ -927,7 +902,7 @@ "\n", "out = client.get_tensor(\"script-output\")\n", "\n", - "print(out)" + "print(out)\n" ] }, { @@ -939,11 +914,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ - "exp.stop(db)" + "exp.stop(db)\n" ] }, { @@ -963,7 +938,7 @@ "metadata": {}, "outputs": [], "source": [ - "exp.start(db)" + "exp.start(db)\n" ] }, { @@ -982,7 +957,7 @@ "rs_prod = exp.create_run_settings(\"python\", f\"producer.py --redis-port {REDIS_PORT}\")\n", "ensemble = exp.create_ensemble(name=\"producer\",\n", " replicas=2,\n", - " run_settings=rs_prod)" + " run_settings=rs_prod)\n" ] }, { @@ -999,7 +974,7 @@ "outputs": [], "source": [ "rs_consumer = exp.create_run_settings(\"python\", f\"consumer.py --redis-port {REDIS_PORT}\")\n", - "consumer = exp.create_model(\"consumer\", run_settings=rs_consumer)" + "consumer = exp.create_model(\"consumer\", run_settings=rs_consumer)\n" ] }, { @@ -1016,7 +991,7 @@ "outputs": [], "source": [ "consumer.register_incoming_entity(ensemble.models[0])\n", - "consumer.register_incoming_entity(ensemble.models[1])" + "consumer.register_incoming_entity(ensemble.models[1])\n" ] }, { @@ -1035,46 +1010,36 @@ "name": "stdout", "output_type": "stream", "text": [ - "00:20:48 e3fbeabfdb3e SmartSim[1408] INFO Working in previously created experiment\n", - "00:20:48 e3fbeabfdb3e SmartSim[1408] INFO Working in previously created experiment\n", - "00:20:48 e3fbeabfdb3e SmartSim[1408] INFO \n", + "19:18:53 HPE-C02YR4ANLVCJ SmartSim[97173:MainThread] INFO \n", "\n", "=== Launch Summary ===\n", "Experiment: tutorial-smartredis\n", "Experiment Path: /home/craylabs/tutorials/getting_started/tutorial-smartredis\n", "Launcher: local\n", - "Ensembles: 1\n", "Models: 1\n", "Database Status: active\n", "\n", "=== Ensembles ===\n", "producer\n", "Members: 2\n", - "Batch Launch: False\n", + "Batch Launch: None\n", "\n", "=== Models ===\n", "consumer\n", - "Executable: /usr/bin/python\n", + "Executable: /usr/local/anaconda3/envs/ss-py3.10/bin/python\n", "Executable Arguments: consumer.py --redis-port 6899\n", "\n", "\n", "\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "00:21:02 e3fbeabfdb3e SmartSim[1408] INFO producer_0(1500): Completed\n", - "00:21:02 e3fbeabfdb3e SmartSim[1408] INFO producer_1(1505): Completed\n", - "00:21:02 e3fbeabfdb3e SmartSim[1408] INFO consumer(1510): Completed\n" + "19:18:58 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO producer_0(97711): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:58 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO producer_1(97712): SmartSimStatus.STATUS_COMPLETED\n", + "19:18:58 HPE-C02YR4ANLVCJ SmartSim[97173:JobManager] INFO consumer(97713): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -1085,7 +1050,7 @@ "exp.generate(consumer, overwrite=True)\n", "\n", "# start the models\n", - "exp.start(ensemble, consumer, summary=True)" + "exp.start(ensemble, consumer, summary=True)\n" ] }, { @@ -1104,21 +1069,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "Tensor for producer_0 is: [[[[0.16503988 0.12075829 0.3565984 ]\n", - " [0.72577718 0.09396099 0.1618377 ]\n", - " [0.33099621 0.55506376 0.69916534]]]]\n", - "Tensor for producer_1 is: [[[[0.68450198 0.27678731 0.65711464]\n", - " [0.74589422 0.45886442 0.52484735]\n", - " [0.5394516 0.20950066 0.96127311]]]]\n", + "SmartRedis Library@19-18-54:WARNING: Environment variable SR_LOG_FILE is not set. Defaulting to stdout\n", + "SmartRedis Library@19-18-54:WARNING: Environment variable SR_LOG_LEVEL is not set. Defaulting to INFO\n", + "Tensor for producer_0 is: [[[[0.40963388 0.66147363 0.88239209]\n", + " [0.67788696 0.66730329 0.26504813]\n", + " [0.80848382 0.96430444 0.75951969]]]]\n", + "Tensor for producer_1 is: [[[[0.67515573 0.28582205 0.79349604]\n", + " [0.78848592 0.67902375 0.54826283]\n", + " [0.01769311 0.55995054 0.47818324]]]]\n", "\n" ] } ], "source": [ - "outputfile = './tutorial-smartredis/consumer/consumer.out'\n", + "outputfile, _ = get_files(consumer)\n", "\n", "with open(outputfile, 'r') as fin:\n", - " print(fin.read())" + " print(fin.read())\n" ] }, { @@ -1130,11 +1097,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "exp.stop(db)" + "exp.stop(db)\n" ] } ], @@ -1154,7 +1121,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb b/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb index 711ae999c..2b5f0a3a5 100644 --- a/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb +++ b/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb @@ -38,14 +38,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'torch'}\n" + "{'tensorflow', 'torch'}\n" ] } ], "source": [ "## Installing the ML backends\n", "from smartsim._core.utils.helpers import installed_redisai_backends\n", - "print(installed_redisai_backends())" + "print(installed_redisai_backends())\n" ] }, { @@ -68,16 +68,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "usage: smart build [-h] [-v] [--device {cpu,gpu}] [--only_python_packages]\n", - " [--no_pt] [--no_tf] [--onnx] [--torch_dir TORCH_DIR]\n", + "usage: smart build [-h] [-v] [--device {cpu,gpu}] [--dragon]\n", + " [--only_python_packages] [--no_pt] [--no_tf] [--onnx]\n", + " [--torch_dir TORCH_DIR]\n", " [--libtensorflow_dir LIBTENSORFLOW_DIR] [--keydb]\n", + " [--no_torch_with_mkl]\n", "\n", - "Build SmartSim dependencies (Redis, RedisAI, ML runtimes)\n", + "Build SmartSim dependencies (Redis, RedisAI, Dragon, ML runtimes)\n", "\n", "options:\n", " -h, --help show this help message and exit\n", " -v Enable verbose build process\n", " --device {cpu,gpu} Device to build ML runtimes for\n", + " --dragon Install the dragon runtime\n", " --only_python_packages\n", " Only evaluate the python packages (i.e. skip building\n", " backends)\n", @@ -90,12 +93,13 @@ " --libtensorflow_dir LIBTENSORFLOW_DIR\n", " Path to custom libtensorflow directory (ONLY USE IF\n", " NEEDED)\n", - " --keydb Build KeyDB instead of Redis\n" + " --keydb Build KeyDB instead of Redis\n", + " --no_torch_with_mkl Do not build Torch with Intel MKL\n" ] } ], "source": [ - "!smart build --help" + "!smart build --help\n" ] }, { @@ -124,12 +128,11 @@ "\u001b[34m[SmartSim]\u001b[0m \u001b[1;30mINFO\u001b[0m Successfully removed ML runtimes\n", "\u001b[34m[SmartSim]\u001b[0m \u001b[1;30mINFO\u001b[0m Running SmartSim build process...\n", "\u001b[34m[SmartSim]\u001b[0m \u001b[1;30mINFO\u001b[0m Checking requested versions...\n", - "\u001b[34m[SmartSim]\u001b[0m \u001b[1;30mINFO\u001b[0m Checking for build tools...\n", "\u001b[34m[SmartSim]\u001b[0m \u001b[1;30mINFO\u001b[0m Redis build complete!\n", "\n", "ML Backends Requested\n", "╒════════════╤════════╤══════╕\n", - "│ PyTorch │ 2.0.1 │ \u001b[32mTrue\u001b[0m │\n", + "│ PyTorch │ 2.1.0 │ \u001b[32mTrue\u001b[0m │\n", "│ TensorFlow │ 2.13.1 │ \u001b[32mTrue\u001b[0m │\n", "│ ONNX │ 1.14.1 │ \u001b[32mTrue\u001b[0m │\n", "╘════════════╧════════╧══════╛\n", @@ -144,7 +147,7 @@ } ], "source": [ - "!smart clean && smart build --device cpu --onnx" + "!smart clean && smart build --device cpu --onnx\n" ] }, { @@ -198,7 +201,7 @@ "\n", "# import smartsim and smartredis\n", "from smartredis import Client\n", - "from smartsim import Experiment" + "from smartsim import Experiment\n" ] }, { @@ -210,7 +213,7 @@ }, "outputs": [], "source": [ - "exp = Experiment(\"Inference-Tutorial\", launcher=\"local\")" + "exp = Experiment(\"Inference-Tutorial\", launcher=\"local\")\n" ] }, { @@ -223,7 +226,7 @@ "outputs": [], "source": [ "db = exp.create_database(port=6780, interface=\"lo\")\n", - "exp.start(db)" + "exp.start(db)\n" ] }, { @@ -321,7 +324,7 @@ " torch.jit.save(module, model_buffer)\n", " return model_buffer.getvalue()\n", "\n", - "traced_cnn = create_torch_model(n, example_forward_input)" + "traced_cnn = create_torch_model(n, example_forward_input)\n" ] }, { @@ -351,46 +354,46 @@ "name": "stdout", "output_type": "stream", "text": [ - "Prediction: [[-2.1860428 -2.3318565 -2.2773128 -2.2742267 -2.2679536 -2.304159\n", - " -2.423439 -2.3406057 -2.2474668 -2.3950338]\n", - " [-2.1803837 -2.3286302 -2.2805855 -2.2874444 -2.261593 -2.3145547\n", - " -2.4357762 -2.3169715 -2.2618299 -2.3798223]\n", - " [-2.1833746 -2.3249795 -2.28497 -2.2851245 -2.2555952 -2.308204\n", - " -2.4274755 -2.3441646 -2.2553194 -2.3779805]\n", - " [-2.1843016 -2.3395848 -2.2619352 -2.294549 -2.2571433 -2.312943\n", - " -2.4161577 -2.338785 -2.2538524 -2.3881512]\n", - " [-2.1936755 -2.3315516 -2.2739122 -2.2832148 -2.2666094 -2.3038912\n", - " -2.4211216 -2.3300066 -2.2564852 -2.3846986]\n", - " [-2.1709712 -2.3271346 -2.280365 -2.286064 -2.2617233 -2.3227994\n", - " -2.4253702 -2.3313646 -2.2593162 -2.383301 ]\n", - " [-2.1948013 -2.3318067 -2.2713811 -2.2844 -2.2526758 -2.3178148\n", - " -2.4255004 -2.3233378 -2.2388031 -2.4088087]\n", - " [-2.17515 -2.3240736 -2.2818787 -2.2857373 -2.259629 -2.3184\n", - " -2.425821 -2.3519678 -2.2413275 -2.385761 ]\n", - " [-2.187554 -2.3335872 -2.2767708 -2.2818003 -2.2654893 -2.3097534\n", - " -2.4182632 -2.3376188 -2.2509694 -2.384327 ]\n", - " [-2.1793714 -2.340681 -2.271785 -2.287751 -2.2620957 -2.3163543\n", - " -2.4111845 -2.3468175 -2.2472064 -2.3842056]\n", - " [-2.1906679 -2.3483853 -2.2580595 -2.2923894 -2.25718 -2.2951608\n", - " -2.431815 -2.3487022 -2.2326546 -2.3963163]\n", - " [-2.1882055 -2.3293467 -2.2767649 -2.279892 -2.2527165 -2.3220086\n", - " -2.4226239 -2.3364902 -2.2455037 -2.394776 ]\n", - " [-2.1756573 -2.3318045 -2.2690601 -2.2737868 -2.264148 -2.3212118\n", - " -2.4243867 -2.3421402 -2.2562728 -2.390894 ]\n", - " [-2.1824148 -2.3317673 -2.2749603 -2.291667 -2.2524009 -2.3026595\n", - " -2.42986 -2.3290846 -2.265264 -2.387787 ]\n", - " [-2.1871543 -2.3408008 -2.2773213 -2.283908 -2.249834 -2.3159058\n", - " -2.4251873 -2.339211 -2.245001 -2.3839695]\n", - " [-2.1855574 -2.3216138 -2.2722392 -2.2826352 -2.2573392 -2.308948\n", - " -2.4348576 -2.3421624 -2.2397952 -2.4060655]\n", - " [-2.1876159 -2.330091 -2.2779942 -2.2849102 -2.2582757 -2.3122754\n", - " -2.4250498 -2.333003 -2.250753 -2.3871331]\n", - " [-2.182653 -2.3381891 -2.2795184 -2.287199 -2.2628696 -2.303869\n", - " -2.413879 -2.3404965 -2.26254 -2.3739154]\n", - " [-2.1733668 -2.3377435 -2.2724369 -2.28559 -2.2537165 -2.3127556\n", - " -2.4249415 -2.3484716 -2.2515364 -2.3897333]\n", - " [-2.1839535 -2.336417 -2.2839231 -2.285238 -2.2608624 -2.3198016\n", - " -2.424396 -2.3165755 -2.2433887 -2.3935702]]\n" + "Prediction: [[-2.2239347 -2.256488 -2.3910825 -2.2572591 -2.2663934 -2.3775585\n", + " -2.257742 -2.3160243 -2.391289 -2.3055189]\n", + " [-2.2149696 -2.2576108 -2.3899908 -2.2715292 -2.2628417 -2.3693023\n", + " -2.260772 -2.3166935 -2.3967428 -2.3028378]\n", + " [-2.2214003 -2.2581112 -2.3854284 -2.2616909 -2.2745335 -2.3779867\n", + " -2.2570336 -2.3125517 -2.391247 -2.302534 ]\n", + " [-2.214657 -2.2598932 -2.3800194 -2.2612374 -2.2718334 -2.3784144\n", + " -2.2596886 -2.318937 -2.3904119 -2.3075597]\n", + " [-2.2034936 -2.2570574 -2.4026587 -2.2698882 -2.2597382 -2.3796346\n", + " -2.2662714 -2.3141642 -2.3986044 -2.2949069]\n", + " [-2.2162325 -2.2635622 -2.3800213 -2.2569213 -2.264393 -2.3763664\n", + " -2.2658355 -2.3211577 -2.3904028 -2.307555 ]\n", + " [-2.2084794 -2.258525 -2.393487 -2.26341 -2.2674217 -2.3792422\n", + " -2.264515 -2.3262923 -2.3823283 -2.300095 ]\n", + " [-2.2175536 -2.2577217 -2.3975415 -2.2582505 -2.269493 -2.365971\n", + " -2.2619228 -2.3258338 -2.3984828 -2.291332 ]\n", + " [-2.2151139 -2.2522063 -2.3931108 -2.2577128 -2.270789 -2.371976\n", + " -2.2567465 -2.32229 -2.395818 -2.308673 ]\n", + " [-2.2141316 -2.2494154 -2.3948152 -2.2606037 -2.2732735 -2.3758345\n", + " -2.2620056 -2.3184063 -2.385798 -2.3094575]\n", + " [-2.221041 -2.2519057 -2.398841 -2.259931 -2.2686832 -2.3660865\n", + " -2.2632158 -2.322879 -2.3970191 -2.2942836]\n", + " [-2.2142313 -2.2578502 -2.393603 -2.2673647 -2.2553272 -2.37376\n", + " -2.2617526 -2.3199627 -2.399065 -2.301728 ]\n", + " [-2.2082942 -2.2571995 -2.3889875 -2.266007 -2.257706 -2.37675\n", + " -2.266374 -2.3223817 -2.3961644 -2.304737 ]\n", + " [-2.2229445 -2.2658186 -2.399095 -2.2566628 -2.266294 -2.3742397\n", + " -2.2578638 -2.3047974 -2.3973055 -2.2988966]\n", + " [-2.215887 -2.2676513 -2.3889093 -2.246127 -2.266115 -2.3842902\n", + " -2.2586591 -2.3106883 -2.396018 -2.3104343]\n", + " [-2.2099977 -2.2719226 -2.391469 -2.255561 -2.266949 -2.371345\n", + " -2.2596216 -2.324484 -2.3890057 -2.3031068]\n", + " [-2.214121 -2.2561312 -2.391877 -2.261881 -2.2639613 -2.3679278\n", + " -2.269122 -2.3139405 -2.4036062 -2.3015296]\n", + " [-2.22871 -2.256755 -2.3881361 -2.2651346 -2.2651856 -2.3733103\n", + " -2.2641761 -2.3182902 -2.3855858 -2.2960906]\n", + " [-2.2103846 -2.2450664 -2.3848588 -2.2795632 -2.2658024 -2.3679922\n", + " -2.2666745 -2.3190453 -2.3987417 -2.3054008]\n", + " [-2.2175698 -2.2573788 -2.391653 -2.2519581 -2.2637622 -2.3839104\n", + " -2.265371 -2.3158426 -2.3929882 -2.3040662]]\n" ] } ], @@ -407,7 +410,7 @@ "\n", "# get the output\n", "output = client.get_tensor(\"output\")\n", - "print(f\"Prediction: {output}\")" + "print(f\"Prediction: {output}\")\n" ] }, { @@ -451,7 +454,7 @@ "source": [ "def calc_svd(input_tensor):\n", " # svd function from TorchScript API\n", - " return input_tensor.svd()" + " return input_tensor.svd()\n" ] }, { @@ -466,46 +469,46 @@ "name": "stdout", "output_type": "stream", "text": [ - "U: [[[-0.31189808 0.86989427]\n", - " [-0.48122275 -0.49140105]\n", - " [-0.81923395 -0.0425336 ]]\n", + "U: [[[-0.50057614 0.2622205 ]\n", + " [-0.47629714 -0.8792326 ]\n", + " [-0.7228863 0.39773142]]\n", "\n", - " [[-0.5889101 -0.29554686]\n", - " [-0.43949458 -0.66398275]\n", - " [-0.6782547 0.68686163]]\n", + " [[-0.45728168 0.88121146]\n", + " [-0.37974676 -0.31532544]\n", + " [-0.80416775 -0.35218775]]\n", "\n", - " [[-0.61623317 0.05853765]\n", - " [-0.6667615 -0.5695148 ]\n", - " [-0.4191489 0.81989413]]\n", + " [[-0.4667158 0.8836199 ]\n", + " [-0.47055572 -0.21237665]\n", + " [-0.7488349 -0.4172673 ]]\n", "\n", - " [[-0.5424681 0.8400398 ]\n", - " [-0.31990844 -0.2152339 ]\n", - " [-0.77678 -0.49800384]]\n", + " [[-0.32159734 0.92966324]\n", + " [-0.6941528 -0.10238242]\n", + " [-0.64399314 -0.35389856]]\n", "\n", - " [[-0.43667376 0.8088193 ]\n", - " [-0.70812154 -0.57906115]\n", - " [-0.5548693 0.10246649]]]\n", + " [[-0.6984835 0.4685579 ]\n", + " [-0.55331963 0.12572214]\n", + " [-0.45382637 -0.8744412 ]]]\n", "\n", - ", S: [[137.10924 25.710997]\n", - " [131.49983 37.79937 ]\n", - " [178.72423 24.792084]\n", - " [125.13014 49.733784]\n", - " [137.48834 53.57199 ]]\n", + ", S: [[164.58028 49.682358 ]\n", + " [120.11677 66.62553 ]\n", + " [130.01929 17.520935 ]\n", + " [198.615 22.047113 ]\n", + " [154.67653 2.6773496]]\n", "\n", - ", V: [[[-0.8333395 0.5527615 ]\n", - " [-0.5527615 -0.8333395 ]]\n", + ", V: [[[-0.7275351 -0.68607044]\n", + " [-0.68607044 0.7275351 ]]\n", "\n", - " [[-0.5085228 -0.8610485 ]\n", - " [-0.8610485 0.5085228 ]]\n", + " [[-0.6071297 0.79460275]\n", + " [-0.79460275 -0.6071297 ]]\n", "\n", - " [[-0.8650402 0.5017025 ]\n", - " [-0.5017025 -0.8650402 ]]\n", + " [[-0.604189 0.7968411 ]\n", + " [-0.7968411 -0.604189 ]]\n", "\n", - " [[-0.56953645 0.8219661 ]\n", - " [-0.8219661 -0.56953645]]\n", + " [[-0.69911253 -0.7150117 ]\n", + " [-0.7150117 0.69911253]]\n", "\n", - " [[-0.6115895 0.79117525]\n", - " [-0.79117525 -0.6115895 ]]]\n", + " [[-0.8665945 -0.499013 ]\n", + " [-0.499013 0.8665945 ]]]\n", "\n" ] } @@ -522,7 +525,7 @@ "U = client.get_tensor(\"U\")\n", "S = client.get_tensor(\"S\")\n", "V = client.get_tensor(\"V\")\n", - "print(f\"U: {U}\\n\\n, S: {S}\\n\\n, V: {V}\\n\")" + "print(f\"U: {U}\\n\\n, S: {S}\\n\\n, V: {V}\\n\")\n" ] }, { @@ -553,7 +556,7 @@ "# Compile model with optimizer\n", "model.compile(optimizer=\"adam\",\n", " loss=\"sparse_categorical_crossentropy\",\n", - " metrics=[\"accuracy\"])" + " metrics=[\"accuracy\"])\n" ] }, { @@ -592,8 +595,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[0.05032112 0.06484107 0.03512685 0.14747524 0.14440396 0.02395445\n", - " 0.03395916 0.06222691 0.26738793 0.1703033 ]]\n" + "[[0.06595241 0.11921222 0.02889561 0.20963618 0.08950416 0.11298887\n", + " 0.05179482 0.09778847 0.14826407 0.07596324]]\n" ] } ], @@ -604,7 +607,7 @@ "model_path, inputs, outputs = freeze_model(model, os.getcwd(), \"fcn.pb\")\n", "\n", "# use the same client we used for PyTorch to set the TensorFlow model\n", - "# this time the method for setting a model from a saved file is shown. \n", + "# this time the method for setting a model from a saved file is shown.\n", "# TensorFlow backed requires named inputs and outputs on graph\n", "# this differs from PyTorch and ONNX.\n", "client.set_model_from_file(\n", @@ -621,7 +624,7 @@ "\n", "# get the result of the inference\n", "pred = client.get_tensor(\"output\")\n", - "print(pred)" + "print(pred)\n" ] }, { @@ -689,7 +692,7 @@ "outputs": [], "source": [ "from skl2onnx import to_onnx\n", - "from sklearn.cluster import KMeans" + "from sklearn.cluster import KMeans\n" ] }, { @@ -704,7 +707,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[1 1 1 1 1 0 0 0 0 0]\n" + "Default@[0 0 0 0 0 1 1 1 1 1]\n" ] } ], @@ -726,7 +729,7 @@ "client.set_model(\"kmeans\", model, \"ONNX\", device=\"CPU\")\n", "client.run_model(\"kmeans\", inputs=\"input\", outputs=[\"labels\", \"transform\"])\n", "\n", - "print(client.get_tensor(\"labels\"))" + "print(client.get_tensor(\"labels\"))\n" ] }, { @@ -753,7 +756,7 @@ "source": [ "from sklearn.datasets import load_iris\n", "from sklearn.ensemble import RandomForestRegressor\n", - "from sklearn.model_selection import train_test_split" + "from sklearn.model_selection import train_test_split\n" ] }, { @@ -787,7 +790,7 @@ "client.put_tensor(\"input\", sample)\n", "client.set_model(\"rf_regressor\", model, \"ONNX\", device=\"CPU\")\n", "client.run_model(\"rf_regressor\", inputs=\"input\", outputs=\"output\")\n", - "print(client.get_tensor(\"output\"))" + "print(client.get_tensor(\"output\"))\n" ] }, { @@ -799,7 +802,7 @@ }, "outputs": [], "source": [ - "exp.stop(db)" + "exp.stop(db)\n" ] }, { @@ -815,15 +818,15 @@ "text/html": [ "\n", "\n", - "\n", + "\n", "\n", "\n", - "\n", + "\n", "\n", "
Name Entity-Type JobID RunID Time Status Returncode
Name Entity-Type JobID RunID Time Status Returncode
0 orchestrator_0DBNode 31857 0 32.7161Cancelled0
0 orchestrator_0DBNode 2809 0 70.9690SmartSimStatus.STATUS_CANCELLED0
" ], "text/plain": [ - "'\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 orchestrator_0DBNode 31857 0 32.7161Cancelled0
'" + "'\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 orchestrator_0DBNode 2809 0 70.9690SmartSimStatus.STATUS_CANCELLED0
'" ] }, "execution_count": 19, @@ -832,7 +835,7 @@ } ], "source": [ - "exp.summary(style=\"html\")" + "exp.summary(style=\"html\")\n" ] }, { @@ -850,7 +853,7 @@ "host. This is particularly important for GPU-intensive workloads which require\n", "frequent communication with the database.\n", "\n", - "\"lattice\"\n" + "
\"lattice\"
\n" ] }, { @@ -874,7 +877,7 @@ " db_cpus=1,\n", " debug=False,\n", " ifname=\"lo\"\n", - ")" + ")\n" ] }, { @@ -889,29 +892,40 @@ "name": "stdout", "output_type": "stream", "text": [ - "21:18:06 C02G13RYMD6N SmartSim[30945] INFO \n", + "19:30:35 HPE-C02YR4ANLVCJ SmartSim[1187:MainThread] INFO \n", "\n", "=== Launch Summary ===\n", "Experiment: Inference-Tutorial\n", - "Experiment Path: /Users/smartsim/smartsim/tutorials/ml_inference/Inference-Tutorial\n", + "Experiment Path: /home/craylabs/tutorials/ml_inference/Inference-Tutorial\n", "Launcher: local\n", "Models: 1\n", "Database Status: inactive\n", "\n", "=== Models ===\n", "colocated_model\n", - "Executable: /Users/smartsim/venv/bin/python\n", + "Executable: /usr/local/anaconda3/envs/ss-py3.10/bin/python\n", "Executable Arguments: ./colo-db-torch-example.py\n", "Co-located Database: True\n", "\n", "\n", - "\n", - "21:18:09 C02G13RYMD6N SmartSim[30945] INFO colocated_model(31865): Completed\n" + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "19:30:38 HPE-C02YR4ANLVCJ SmartSim[1187:JobManager] WARNING colocated_model(3199): SmartSimStatus.STATUS_FAILED\n", + "19:30:38 HPE-C02YR4ANLVCJ SmartSim[1187:JobManager] WARNING colocated_model failed. See below for details \n", + "Job status at failure: SmartSimStatus.STATUS_FAILED \n", + "Launcher status at failure: Failed \n", + "Job returncode: 2 \n", + "Error and output file located at: /home/craylabs/tutorials/ml_inference/Inference-Tutorial/colocated_model\n" ] } ], "source": [ - "exp.start(colo_model, summary=True)" + "exp.start(colo_model, summary=True)\n" ] }, { @@ -927,16 +941,16 @@ "text/html": [ "\n", "\n", - "\n", + "\n", "\n", "\n", - "\n", - "\n", + "\n", + "\n", "\n", "
Name Entity-Type JobID RunID Time Status Returncode
Name Entity-Type JobID RunID Time Status Returncode
0 orchestrator_0 DBNode 31857 0 32.7161Cancelled0
1 colocated_modelModel 31865 0 3.5862 Completed0
0 orchestrator_0 DBNode 2809 0 70.9690SmartSimStatus.STATUS_CANCELLED0
1 colocated_modelModel 3199 0 3.1599 SmartSimStatus.STATUS_FAILED 2
" ], "text/plain": [ - "'\\n\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 orchestrator_0 DBNode 31857 0 32.7161Cancelled0
1 colocated_modelModel 31865 0 3.5862 Completed0
'" + "'\\n\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 orchestrator_0 DBNode 2809 0 70.9690SmartSimStatus.STATUS_CANCELLED0
1 colocated_modelModel 3199 0 3.1599 SmartSimStatus.STATUS_FAILED 2
'" ] }, "execution_count": 22, @@ -945,7 +959,7 @@ } ], "source": [ - "exp.summary(style=\"html\")" + "exp.summary(style=\"html\")\n" ] } ], diff --git a/doc/tutorials/ml_training/surrogate/fd_sim.py b/doc/tutorials/ml_training/surrogate/fd_sim.py index db68b24b2..7732f13d8 100644 --- a/doc/tutorials/ml_training/surrogate/fd_sim.py +++ b/doc/tutorials/ml_training/surrogate/fd_sim.py @@ -9,8 +9,8 @@ def augment_batch(samples, targets): """Augment samples and targets - - by exploiting rotational and axial symmetry. Each sample is + + by exploiting rotational and axial symmetry. Each sample is rotated and reflected to obtain 8 valid samples. The same transformations are applied to targets. @@ -76,7 +76,7 @@ def augment_batch(samples, targets): def simulate(steps, size): """Run multiple simulations and upload results - + both as tensors and as augmented samples for training. :param steps: Number of simulations to run @@ -85,13 +85,13 @@ def simulate(steps, size): batch_size = 50 samples = np.zeros((batch_size,size,size,1)).astype(np.single) targets = np.zeros_like(samples).astype(np.single) - client = Client(None, False) + client = Client(address=None, cluster=False) training_data_uploader = TrainingDataUploader(cluster=False, verbose=True) training_data_uploader.publish_info() for i in tqdm(range(steps)): - + u_init, u_steady = fd2d_heat_steady_test01(samples.shape[1], samples.shape[2]) u_init = u_init.astype(np.single) u_steady = u_steady.astype(np.single) diff --git a/doc/tutorials/ml_training/surrogate/tf_training.py b/doc/tutorials/ml_training/surrogate/tf_training.py index 932cb2df3..a7aaf3ebf 100644 --- a/doc/tutorials/ml_training/surrogate/tf_training.py +++ b/doc/tutorials/ml_training/surrogate/tf_training.py @@ -20,7 +20,7 @@ def create_dataset(idx, F): def store_model(model, idx): serialized_model, inputs, outputs = serialize_model(model) - client = Client(None, False) + client = Client(address=None, cluster=False) client.set_model(f"{model.name}_{idx}", serialized_model, "TF", "CPU", inputs=inputs, outputs=outputs) def train_model(model, epochs): @@ -43,7 +43,7 @@ def train_model(model, epochs): for epoch in range(epochs): print(f"Epoch {epoch+1}") - model.fit(training_generator, steps_per_epoch=None, + model.fit(training_generator, steps_per_epoch=None, epochs=epoch+1, initial_epoch=epoch, batch_size=training_generator.batch_size, verbose=2) if (epoch+1)%10 == 0: @@ -68,11 +68,11 @@ def upload_inference_examples(model, num_examples): if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Finite Difference Simulation") - parser.add_argument('--depth', type=int, default=4, + parser.add_argument('--depth', type=int, default=4, help="Half depth of residual network") - parser.add_argument('--epochs', type=int, default=100, + parser.add_argument('--epochs', type=int, default=100, help="Number of epochs to train the NN for") - parser.add_argument('--delay', type=int, default=0, + parser.add_argument('--delay', type=int, default=0, help="Seconds to wait before training") parser.add_argument('--size', type=int, default=100, help='Size of sample side, each sample will be a (size, size, 1) image') diff --git a/doc/tutorials/ml_training/surrogate/train_surrogate.ipynb b/doc/tutorials/ml_training/surrogate/train_surrogate.ipynb index c811d1205..5625b86b9 100644 --- a/doc/tutorials/ml_training/surrogate/train_surrogate.ipynb +++ b/doc/tutorials/ml_training/surrogate/train_surrogate.ipynb @@ -25,7 +25,7 @@ "\n", "The problem can be solved using a finite difference scheme. To this end, a modified version of the code\n", "written by John Burkardt will be used. Its original version is licensed under LGPL, and so is this example.\n", - "The code was downloaded from [this page](https://people.sc.fsu.edu/~jburkardt/py_src/fd2d_heat_steady/fd2d_heat_steady.html),\n", + "The code was downloaded from [this page](https://github.com/johannesgerer/jburkardt-m/tree/master/fd2d_heat_steady),\n", "which explains how the problem is discretized and solved.\n", "\n", "In the modified version of the code which will be used, a random number (between 1 and 5) of heat sources is placed.\n", @@ -35,80 +35,68 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 8, "id": "6a49acfb-2585-4423-9de9-3b26bd679a90", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAn8AAAFKCAYAAABsETl4AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAABKLklEQVR4nO3deZQV5Zk/8G8td+mmm0WECIq44RJFXEISRVk0CqKO66hxjoIRxSSCTn6JUZOIMxlHJCZjYpIxUUdijksmGg0MamIUxAVcohJFMQYBjRgBlUWa7ntv1fv7o0/34X3ep28Vl266tb6fczxJ1X1ruftL3e/ztGeMMSAiIiKiTPC7+wSIiIiIaMfh5I+IiIgoQzj5IyIiIsoQTv6IiIiIMoSTPyIiIqIM4eSPiIiIKEM4+SMiIiLKEE7+iIiIiDKEkz8iIiKiDOHk71PohRdewHHHHYcBAwbA8zwccsghXXKcyZMnw/M8rFy5crv2s8cee2CPPfbYpm3Gjh0Lz/O267izZ8+G53mYPXv2du2HqBYrV66E53mYPHnydu2Hr+PO4Xkexo4d292nQbRDcPLXDdo+9CdMmNDp+964cSNOPPFEPPfcczj77LMxY8YMXHLJJZ32RbOjXHvttfA8DwsWLOjuU2nXGRNOalXLhL8na/uH0Nb/NTY24vDDD8esWbPQ0tLS3afYrjs/CzrrH4yfVp35vuA/CqiasLtPgDrXc889hzVr1uC6667D1Vdf3b6+Kz5sr7/+elx55ZXYddddt2s/jz322DZvc+edd6KpqWm7jkvU2S688ELstttuMMZg9erVeOCBB/Dtb38bjz/+OB555BFr7K677orXX38dffr06aazJaKs4uTvU2b16tUAgMGDB3f5sQYNGoRBgwZt93723nvvbd5m99133+7jEnW2KVOm4Itf/GL78syZM3HwwQfjD3/4A+bPn49x48a135bL5bD//vt3x2kSUcbxZ99PgE2bNmHGjBk48MADUVdXh759+2L8+PF46qmnrHGe52HSpEkAgAsuuKD956fZs2djzz33BAD86le/sn6a2p6fVbWfcBYsWADP83Dttde2Zw8bGxvRp08fnHbaaeoVSPlTx9ixY/Fv//ZvAIBx48a1n6scI3+C3bBhA2644QaMGTMGgwcPRj6fx+DBg3H++edj+fLlNd9PoPWxfeKJJ9r/f9t/8qezv/zlLzjnnHMwaNAg5PN5DB06FNOmTcMHH3xgjdv6p7fXX38dJ510Evr27Yt+/frhy1/+MtatWwcAWLRoEY499lj07t0b/fr1w5QpU7B582ZrX1s/5k899RTGjh2LxsZG9O3bF2eccQb+9re/qfdpzZo1+Nd//Vfss88+KBQK2HnnnXHGGWfg1Vdfdca2PUfr16/HpZdeiiFDhiAMw/aflP785z/j0ksvxUEHHYQ+ffqgrq4Ow4cPx8yZM1Eul537vWrVKqxatcp6LK+99loA1X+u2vq+yudn7NixePfdd3H++edjl112ge/71ut74cKFOPnkk7HzzjujUChg2LBh+O53v9ulV5D79++PU089FUDrY7S1aj+//uUvf8HEiRPb3zsTJ07Eq6++mviz6R//+EcceeSRqK+vR//+/TFp0iTrtddVnwXvvfceLrvsMgwbNqz9M+qAAw7AJZdcgg0bNgBofQ396le/AgDsueee7ceVObsVK1ZgypQp2H333VEoFDBo0CBMnjwZq1atco77wAMP4Mtf/jL22Wcf1NfXo0+fPjj66KNx//33d3iut912Gw466CAUi0UMGTIEV1xxBZqbm51xRx11FMIwxHvvvafu5/zzz4fneVi0aFHi4/Piiy/izDPPbL9PAwYMwMiRI3HdddcBSPe+KJVKuPnmmzF+/HgMGTIEhUIBAwcOxOmnn46XXnrJOt7kyZNxwQUXALC/C+RnZtrvFvr04ZW/Hu7DDz/E6NGjsXTpUowaNQqXXHIJNm7ciN///vcYN24cfvvb37Z/ucyYMQMvv/wyfv/73+OUU05pL/Q45JBDcNlll+HHP/4xRowY0T4egDWhavtgMMZs93k///zzmDVrFsaNG4epU6fipZdewoMPPohXXnkFr776KorFYofbtn0ZPvHEE5g0aVL7Ofbt27fqMV9//XVcc801GDduHE477TT06tULy5Ytw91334158+bhxRdfxNChQ2u6PzNmzMDs2bOxatUqzJgxo3391sU0c+bMwVlnnQXf93HKKadgyJAheO211/DTn/4Uf/jDH/Dss8+iX79+1n5XrFiBI488Ep/73OcwZcoUvPDCC7j33nvxzjvvYObMmTj++ONx3HHH4eKLL8aCBQtw++23I45j/M///I9zjosXL8b111+PCRMmYNq0aVi6dCkeeOABPPnkk1i8eDH22muv9rHLly/H2LFj8fe//x3HH388Tj31VKxZswb3338//vCHP+Cxxx7DF77wBWv/LS0tOOaYY/Dxxx/jn/7pnxCGIT7zmc8AAG699VbMnTsXo0ePxsSJE9HU1IQFCxbgqquuwvPPP9/+Zdy3b1/MmDEDN910EwDg8ssvb9//9obtP/jgAxxxxBHYaaedcM4556C5uRm9e/cGAPz3f/83vv71r6Nv3744+eSTMXDgQLzwwgu47rrrMH/+fMyfPx/5fH67jp8kDNN93C5ZsgRHH300Nm/ejNNPPx3Dhg3DCy+8gKOOOgojRozocLs5c+Zg3rx5OPnkk3HkkUdi4cKFuPPOO7F8+fL2L/Ou+CxoamrCqFGjsHLlShx//PE47bTTUCqVsGLFCvz617/GN7/5TfTp0weXX345Zs+ejSVLluCyyy5rfz9vfdxnn30W48ePx+bNm3HSSSdh2LBhWLlyJe666y48/PDDWLRokfU6vuqqq5DP53HUUUdh0KBBWLt2LebMmYMzzzwTP/nJTzBt2jTrXL///e/jmmuuwWc+8xlcdNFFyOVy+M1vfoPXX3/duV9Tp07F008/jTvuuMOK0ADA+vXrcd999+HAAw/EEUccUfXxefnll3HkkUciCAKccsopGDp0KNavX4/XXnsNv/zlL/Gd73wn1fviww8/xOWXX46jjz4aEydORL9+/fDWW29hzpw5ePjhh7Fw4UKMHDkSAHDqqadi/fr1znfB1rblu4U+hQztcCtWrDAAzPjx4xPHnnvuuQaAufXWW63177//vhkyZIgZMGCA2bJlS/v6O+64wwAwd9xxh3rMSZMmdXgsAGZbXhKTJk0yAMyKFSva182fP799P/fee681/rzzzjMAzD333GOtHzp0qBk6dKi1bsaMGQaAmT9/vnrsMWPGOOe6fv1688EHHzhjH3/8ceP7vpkyZYq1vqPHqiPaMdusW7fO9O7d2+y6665m5cqV1m333HOPAWAuvfTS9nVtzwcAc9NNN7Wvj+PYTJw40QAwffv2NQ8++GD7baVSyRx88MEmDEPzj3/8o3391o/5LbfcYh37lltuMQDMSSedZK0/8sgjTRAE5pFHHrHWv/HGG6axsdEMHz7cWj906ND212xTU5Nz/1etWmUqlYq1Lo5j85WvfMUAME899ZSzP/mct6n2vLTd1xkzZljr2+7/BRdc4JzH0qVLTRiGZsSIEWbdunXWbddff70BYG688Ub1XNJqey8sWrTIWr9u3TozePBgA8A899xz1m0dvSePOuooA8Dcdddd1vrvfe977fdz6/dc2+MVhqH1OFcqFTN27FjnvDr7s2DOnDkGgLn88sud2zZt2mSam5vbl7XPjDalUsnsscceprGx0bz44ovWbU8++aQJgsB5HS9fvlw95vDhw02fPn3M5s2b29e/+eabJgxDs+uuu5r333+/ff2GDRvMfvvtZwCYMWPGtK/fsmWL2Wmnncxee+1l4ji2jvHTn/7Uee925Bvf+IYBYL2X28jXY7X3RXNzs/n73//urH/11VdNQ0OD+dKXvmStT/p829bvFvp04c++Pdi6devwm9/8BscccwymTJli3TZw4EB861vfwtq1a/GnP/2pU473+uuvq/8CrsXo0aNx9tlnW+u+8pWvAGi9KtgV+vTpg5122slZP27cOBx44IGd9jhp7rzzTmzcuBHXX3+9c3XxnHPOwWGHHYZ7773X2W7vvffG9OnT25c9z8M555wDADj00ENxyimntN+Wy+Vw5plnolKp4LXXXnP2te++++Kiiy6y1l100UUYNmwY5s2bh7Vr1wIAXnrpJTzzzDOYNGkSxo8fr+6j7QqtNGvWLNTV1Tnrd999dwRBYK3zPA9f//rXAaBLH/s2+Xwes2bNcs7jF7/4BSqVCm6++Wb079/fuu2KK67AgAEDcM8993TKOdx222249tprMWPGDFx00UXYf//9sXr1akyfPr39qkw1q1atwlNPPYURI0bg3HPPtW779re/7Vw53tq5556LUaNGtS8HQdAeA9nW91wtnwXa66KhoQGFQiHV9v/3f/+HlStX4lvf+hYOPfRQ67ajjjoKp5xyCh566CFs3Lixff3WVwG3PubkyZOxYcMG637ffffdqFQq+MY3voGBAwe2r+/duze++93vOvspFouYNGkS3nrrLTz++OPWbbfffjsKhQLOO++8VPcN0B8f+XqsplAoqMV1Bx54IMaNG4eFCxdaEYtqdvR3C/U8/Nm3B3v++ecRRRFaWlqcjBMAvPnmmwCAZcuW4aSTTtru43Vm+Pzwww931u22224AWn8y6SoLFizATTfdhGeffRbr1q1DpVJpv60rf9ZbvHgxgNafrbR8YXNzM9atW4d169Zh5513bl9/8MEHOzmctiIa7aeattvaCnu2NmrUKPi+/e853/cxatQovPnmm1iyZAm+9KUvtZ/r+++/r76uli1b1v6/Bx10UPv6YrGI4cOHO+OB1jzST3/6U9x7771YtmwZPv74Y+snQ+18O9uee+5pPbZt2u5v28/ZUi6Xa7/P2+v222931v2///f/cOONN6bafsmSJQBgTeLa9OrVC4cccgjmz5+vbtuZ77lt+SwYPXo0Bg0ahJkzZ2LJkiU46aSTMGbMGBxwwAHb1Bqp7Xl644031NflP/7xD8RxjL/+9a/43Oc+B6A1tzpz5kw8/PDDWLVqFbZs2WJts/Xrru2xPfroo519a+sA4OKLL8Z//dd/4dZbb8Wxxx4LoDW7+dJLL+Hcc89V/7EpnXXWWbjppptw2mmn4eyzz8Zxxx2H0aNH19Ql4eWXX8asWbPw1FNP4R//+Icz2Vu3bl2qIrwd/d1CPQ8nfz3Yhx9+CAB4+umn8fTTT3c4ThYA9ARtWauttWWeoijqkmP+9re/xdlnn42GhgaMHz8ee+yxB+rr69uLB7TAeGdpe65+9rOfVR23efNma4JS7XGqdpv2L/y2/F1H69uC923nOm/ePMybN6/quW5t4MCBHX6Zn3nmmZg7dy723XdfnH322Rg4cCByuRzWr1+PH//4xzukz11H97/t/raF67vSokWL8MUvfhGlUglLlizB1772Nfzwhz/EAQccgAsvvDBx+7arWltfmdpaR/cR6J73HNB6xX3x4sW45pprMHfuXDz00EMAgCFDhuDKK6/E1772tVT7aXue7rrrrqrj2l6XH374IUaOHIm3334bo0aNwpe+9CX07dsXQRC0Z5+3ft21vf61x7ajx3X//ffHmDFj8OCDD+KDDz5A//79cdtttwGAc5W9I1/4whewYMEC/Od//ifuvvtu3HHHHQCAkSNH4oYbbrAqwKt55plncMwxxwAAjj/+eAwbNgwNDQ3wPA8PPvgglixZkvp99kn+bqHOwclfD9b2Yb4tVw6y7Nprr0WxWMSf//xnDBs2zLpN+8m1M7U9V6+88op1tWxHev/996uub+sn13auN998My699NLU++9o4vf8889j7ty5GD9+PObNm2f97Lp48WL8+Mc/Tn0MAO1XL7e+atum7Qt8W86v7f5u3LgRjY2N23Qutcrn8xg5ciQeeugh7Lfffpg+fTomTJiQeLWn7VzXrFmj3t7Rc9zddt99d8yePRtxHOMvf/kL/vjHP+InP/kJvv71r7dXsCdpu+9z585NdbXp9ttvx9tvv43vf//7zs+2M2fOxO9//3trXdvrf82aNU40o9rjeskll+CJJ57AnXfeialTp+Kee+7BsGHDtqlA6eijj8bDDz+MLVu24Nlnn8XcuXPx85//HCeeeCJeffVV9edr6brrrkNLSwuefPJJHHXUUdZtixcvbr+ymQa/W4iZvx5s5MiRqVsJJGn7Qu7KKwCdqZbzXb58OQ444ABn4vfee+/hrbfe6tJzaquM7YznqlZPP/004ji21sVxjGeeeQae57VXinb2ubb9zH3iiSc6ebsnn3xS3SYIgg6f27Zc27vvvuvcJltapNF2f9t+VtyRBgwYgBkzZqCpqam9fVE1bc/RM88849zW1NS0TV/wHenKzwLf93HIIYfgiiuuaM9RzpkzJ9Wxt/V12fa62zoX20Z73bU9ttptHb1OAeD000/HgAEDcNttt+G3v/0tNmzY4OTk0qqrq8PYsWPxwx/+EFdffTW2bNmCRx99tP32au+L5cuXY6eddnImfk1NTXjxxRed8dUe6878bqFPJk7+erBddtkFZ511Fp555hn84Ac/UNsuPPvss6n6lPXr1w+e5+Gdd97pcMyyZcs6Lfu0vdqyNNXOVxo6dCj+9re/Wf+Kb25uxle/+tXUQehaz+mCCy5AY2MjvvOd72Dp0qXO7U1NTV0++fjrX/+KW2+91Vp366234q9//StOPPFEDBgwAADw+c9/Hl/4whdwzz334De/+Y2znziO23saptF2FUX2Blu6dCmuv/56dZuddtoJ69atU/urHX744fA8D/fee691+5tvvrnNVxEB4Gtf+xrCMMS0adPw9ttvO7evX7/emVQuX74cy5Yt65TXzdSpUzF48GDccccdWLFiRdWxQ4cOxahRo/Dyyy87z80PfvCD9p/rtkdnfxYsXbpUvXLWtm7rtk7V3kOnnHIKdt99d/zoRz/CwoULndvL5bL1GuvodXf33Xe3//S8tXPPPRdBEOBHP/qRdWV148aN+I//+I8O718+n8fkyZPx2muv4eqrr0Yul9umP423aNEi9XXe0ePT0fti6NCh+Oijj6zPlyiK8M1vfrO9mGtr1R7rWr5bNmzYgGXLlnXY95A+Wfizbzd65ZVXOvwQ2X///XHllVfi5z//Od544w1cccUV+PWvf40jjjgCffv2xTvvvIMXXngBb775Jt577z3U19dXPVZDQwNGjhyJhQsX4rzzzsOwYcPg+z7OO++89g/RAw44AEDn9PnbXm3Nna+++mosXboUffr0Qd++fav+TDlt2jRMmzYNhx56aHtV7KOPPgpjDEaMGLHdV02OOeYY3HfffTjjjDNwwgknoFgsYsSIETj55JPbK0b/+Z//GSNGjMCECROw//77o6WlBStXrsQTTzyBI4880vkTX51p/PjxmD59Oh566CEceOCBWLp0KebOnYudd97ZmTTdc889GDduHM455xzcdNNNOOyww1BXV4e3334bixYtwtq1a9UvIM3nP/95fP7zn8f//u//4r333sMXv/hFvP3225gzZw5OPPFE3Hfffc42xxxzDF544QWccMIJOProo5HP5zF69GiMHj0agwcPxpe//GXcfffdOPzwwzFhwgSsWbMGDzzwACZMmFC1ga/moIMOws9//nN89atfxX777YeJEydi7733xqZNm/DWW2/hiSeewOTJk3HLLbe0b3Psscdi1apVWLFixXb/rdVisYgrr7wS06dPx7//+7+3Z746cvPNN2P06NH4l3/5F9x///3YZ5998OKLL2Lx4sUYPXo0Fi5c6BT2bIvO/ix49NFH8a1vfQujRo3Cvvvui/79+7f3nysWi+0V30Dr837jjTfi4osvxhlnnIFevXph6NChOO+881AoFHDffffhhBNOwJgxY3DMMcdg+PDh7c2Pn3zySfTv3799UnreeefhhhtuwLRp0zB//nwMHToUS5YswWOPPYbTTz8dv/vd76zz3GeffXDNNddgxowZOPjgg3HWWWchDEPcf//9OPjgg/HGG290eB+nTp2KG2+8EatXr8YZZ5zRYSZTc8MNN2D+/PkYPXo09txzTxSLRbz44ot47LHHsNdee+G0006zHp+O3hfTpk3DH//4Rxx11FE466yzUCwWsWDBArz77rsYO3as06T7iCOOQF1dHW666SZ89NFH7f/4a/uJfFu/Wx544AFccMEFmDRpEv9e8KdBd/aZyaqte7x19N/W/aaamprMrFmzzOGHH2569epl6urqzJ577mlOPfVUc+edd5pyudw+tlpvpzfeeMNMnDjR9O3b13ie5/TRwzb09jKmep8/2Ydt6/st+4t11Ntq9uzZZvjw4aZQKBgA1hit514cx+aWW24xBx54oCkWi2aXXXYxF154oVmzZo06flv7/JXLZXPFFVeY3Xff3YRhqN6XZcuWmQsvvNAMHTrU5PN5069fPzN8+HAzffp0q89btV5r1R5D7Zy3Hv/kk0+aMWPGmF69epnevXub0047zbz55pvq/fnwww/Nd7/7XXPQQQeZuro609DQYIYNG2bOPfdc87vf/c4aW63/mDHGrFmzxnzlK18xgwcPNsVi0QwfPtz87Gc/M2+99ZZ6Pzdt2mQuuugiM2jQIBMEgXN/m5qazPTp081nPvMZUygUzMEHH2zuuuuuqn3+tn7PaJ577jlzzjnnmMGDB5tcLmd23nlnc9hhh5krr7zSvP766879la/tajrq89emubnZ7LrrriYIAvPGG28YY6q/Bl566SUzfvx409DQYBobG80JJ5xgXnnlFXPSSScZAOajjz5qH1tLX8TO/Cx47bXXzGWXXWYOPfRQ079/f1MoFMxee+1lJk2aZJYuXeqMnzVrlhk2bJjJ5XLq8/b3v//dXHbZZWbYsGGmUCiY3r17mwMOOMBMmTLFPPbYY9bYl19+2Rx//PGmX79+prGx0YwZM8b86U9/qvqY3Hrrreazn/2syefzZrfddjPf/OY3TVNTU+JrqK3/ouyNmeSRRx4x559/vtlvv/1MY2OjaWhoMJ/97GfN1VdfbdauXWuNTXpf3Hfffeawww4z9fX1ZueddzZnnXWWWb58eYf9E+fNm2dGjhxp6urq1Oe0lu+Wav0h6ZPDM6YHXOYhopotWLAA48aNw4wZM9S2DfTpEEUR9t57b2zZsqXHFn58WjU3N2O33XZDQ0MD3nrrre268krUE/AVTETUg1Qqlfa/7by1mTNnYtWqVfyTW93gjjvuwAcffICpU6dy4kefCsz8ERH1IB9//DF23XVXHHfccdh3331RLpfx7LPP4vnnn8egQYN4dXcHmjlzJtauXYtf/OIXGDhwYOqehUQ9HSd/REQ9SH19PS688EI8/vjjWLhwIZqbmzFo0CBMnToV3/ve91L9BQfqHFdddRVyuRxGjBiBm2++ub1XINEnHTN/RERERBnC8AIRERFRhnDyR0RERJQhnPwRERERZQgnf0REREQZwskfERERUYZw8kdERESUIZz8EREREWVIqibPcRxj9erVaGxshOd5XX1ORJRBxhhs2rQJgwcP/lT+CS1+jhJRV0v7OZpq8rd69WoMGTKk006OiKgj77zzDnbbbbfuPo1Ox89RItpRkj5HU03+GhsbAQCHH/tbhGF955wZEdFWKpUm/Pmxf27/vPm04ecoEXW1tJ+jqSZ/bT9RhGE9wlyv7T87IqIOfFp/EuXnKBHtKEmfo5++YA0RERERdYiTPyIiIqIMSfWzLxERdQ+vhspnz9/2n849r3OuBdR07C6q7vZrOBfaPnFsumS/Jo5r2Gb7z8WYbT9urceu5T7Wilf+iIiIiDKEkz8iIiKiDOHkj4iIiChDmPkjIuohtOxbmgydzOul2kYcK00+Lk0uUB47TZ6vlpygX8O51Kqz9vNJUmteLmm7uIsydFpeLs19SMr0aRnGNNk84yWcr3rc5Nd0Z+UCeeWPiIiIKEM4+SMiIiLKEE7+iIiIiDKEkz8iIiKiDGHBBxFRN5HFEFphQZpijjTFG0n7SVNsUkuRRa33SfLlY5Xib0DX1nC6tuKONI9NT1JL4UWqAoqkwgzj3h6nKaAQ+5UFE9px05yvfBzkNn6KQhKteEMWijiFGrHyevGT71Nn+WS9WomIiIhou3DyR0RERJQhnPwRERERZQgzf0REPViabF4QBFW30bZLt404tpJrC0KxHy9F5i9FljApe6dnCcV+a2h2rY7ponxhd6olm+fuI0XWLdV+ZD5OyQUmZPO0baIoShzj17BfeS5xJXLGeKLJsxGvD615dFyRJ5fcCLrWps+88kdERESUIZz8EREREWUIJ39EREREGcLJHxEREVGGsOCDiKiH0Aoqkgo1WseIQofQHZNU4OEr+01TFJI0Rt9GNnlW7ndC4YjeyDr52H5C8UaaApU00hSJ7Ci1FF2kGRMr+3UbIIttUjRj1s5XFjZEUXJDZD+SxRHJx05TJOJFboFH0n5lMYevFHPIohCtEbRB8rHT4JU/IiIiogzh5I+IiIgoQzj5IyIiIsoQZv6IiLpJmnyczLapY0TGT8sFykyfbM7sB8nNo/1QG1M9v6fvV26TnM2Tx9Yydc5jpYxxG0xXv12TZkxStnBH0rJ5Uprmxu5+lXUii5em6bObu1PycAlj5HEBIIzt16+237hir5PPrbbfNO9buV/3dned58vnoHPyfRpe+SMiIiLKEE7+iIiIiDKEkz8iIiKiDOHkj4iIiChDWPBBRNRD6M2ORQFFigbOQc79aE8q3gjVbWTxhnLssHqBh9yHNkZr2OzuZ9sbOOsNphOaPGuFJM5+q+6iw/04+62hebSkNU1OQxZeGFGfoDZwTtOwWaxK04w5isR+tcIM2TQ5qt70WRvjK0UYcWAXVXhl+zmJfLfowqts//Mmm1YDgG/s/TpNnwF4ckz1upIO8cofERERUYZw8kdERESUIZz8EREREWUIM39ERD2Y25RYy9AlN3mWmb40mT+ZuwtzWt6w+hi9ybMnlrX7JO63eBwCLR8pVmn7lVG8pKbPrdvIc0kek3RuXSlNDszN71W/HXCbOqfL74llZb9uw+bkMZWyyPOlyPx5npvfi5yGzaKZeFlp4Oxte9BOPlZadleOkU2fAQBR5zR+5pU/IiIiogzh5I+IiIgoQzj5IyIiIsoQZv6IiLqJ08NP6+vlyz5/SoYurN7DT1snM35ani8MZf9ALUso84Ze1du1MVp+T2b+3Jxgcg8/LYfnjkHVZcB9XtQxCa3ftJ6DachjKZG5RFo2z8nvieVUPfy0MSIOJzOA2jYyv6dm/kQoMQiS+/zJ/ap9H0Xvv0olOVNXQcVa9k3ydTT5HKjPiTy/zon3qXjlj4iIiChDOPkjIiIiyhBO/oiIiIgyhJM/IiIiogxhwQcRUQ8hC0AAt2GzNkY2Uk7T5FkWYuTy7teBHBMqhSTOmJwoLNG2CeX5JhdmyG20RstuI2hljGwenargI3mM0wg6RX1HQl/omrlFIUqjYqfgQxRmRO42bsGHe2ynwEM2eVa2qYT2seQ2AFCpiNeMb++orBRqyAIPreCjojR+7gyyKMSLks/FKe5SxsTO+7+28+eVPyIiIqIM4eSPiIiIKEM4+SMiIiLKEGb+iIi6SZpMklwnGzoDbsZPbdickPHTtsnlk/ebT8z8ufdJ5ve0MW7mLznPJ6OOWmZKbidzd0pcMlUuUK7rWZk/l8z8xbFX9XYAiES8TMvvyRygzA5qPZQrFTGm4h5cNnV2sptl7TUktlHyffJ5Krmn55ANsIPYfTHGMqMoXlixfDBReyPwWvDKHxEREVGGcPJHRERElCGc/BERERFlCCd/RERERBnCgg8ioh5CNnltXSeC7Uqlgy8bIGtNnsPqxRuyuENbl08xJicKM3I5rclz9WKO1v3Yy7J4Q6l7cYo11KKQpIIP3y02kGPURtCeqTpG38Zd1xm0Yg3JbdjsVb29dYw4TuzegYoo8CjbvY7VIpFSWexDeT3IohC/bO8oUB5MreBnW2kFIKF4cIzygPuhKDYx4j0auS9gv2JvozV07yy88kdERESUIZz8EREREWUIJ39EREREGcLMHxFRDyZzP1oj2EAE2bRmzIFsxixyglrmT2b8tDGFgr2ffIomz/mcvRwG7pgwlGOqLwNuXk+JUDpjnAwglAbDTk4wORfoo3oGsKP9dAVj3IM7mT8jM3/uNjLzV0mR+ZMZUK3Js3yNyAwgAJSd50Bk6LrosdTyk0Y8eLF8YADEkX1+sumznyLfm6bpe6145Y+IiIgoQzj5IyIiIsoQTv6IiIiIMoSZPyKibiL7+ql5PhFuk38gHnD7+mmZP6evX8H++Nd6+BUKMvPnXi8o5GXmT/b5S5P5c4YgF5qqY8JAyeZ5cpvkMWn6/Lk9/JQxCRk/NSeo5AvdYycOcaTp8xeLvnNuBtB9rmMj83tK5i/2xbLI81Xcbcpind6fUYwRrwftcaqlT56T51MaHso8n+yhCQCR6Nkn+/555a7L86XBK39EREREGcLJHxEREVGGcPJHRERElCGc/BERERFlCAs+iIh6CF8JqHuicsBX0vCymEMfI0PqyU2eZYGHLO5oXedVXZbFHQCQC+WyG6rPy4IPXy67jXVlgYfW+DfwRBBfNn3WijnENlqhhizokI9UmoKPNE2f0xSJGCQXDrgFH/Y2sulz6zr7NVJRXmey4KMsiyOU5salQBZ8uMf2ZQNkrWt2AqMUsUhxHIhlpeBDvHeiyN1vIMZURIGHHyrv9bIsAOu663O88kdERESUIZz8EREREWUIJ39EREREGcLMHxFRN3FyTCn+kLts+gwAgcheqU2eRcZINnXO57Q8n8z8uedXLMjMn327zO61Hstel1eaMecCO2cnM37ydsDN5mm5QDlGZvzk7do6LXcns4ROnk/LCXZR5k/SMoAy4xeLa0EyEwgAFZH5kxlAACiLzFxZdGMuR8prs2IfK1Cyr77Iv/lO5q+2BsmycXUkMouxcr4V0bA5qCjnK96T8n0blSNnG/le1z8POueaHa/8EREREWUIJ39EREREGcLJHxEREVGGcPJHRERElCEs+CAi6iG0gHcQJDdwlg1jZUNnwC3wyIkxuZx7bFngIYs7WsfYy8W8XZBQCN0CCrlOK97Ih3YgPhQFFTnfDcwHYp1WvBFAFnzY22gFFXI/Ptz9ugUeNWxjksd0FlkEEnuiubFSdBGJKUPFuFOIsm+vK8d2l++S524TiGN7yhi3p7MsUNEKPsR9dB9epcDDfrxj5b0kC1S0ZteyCEs2pdbe67KopSvxyh8RERFRhnDyR0RERJQhnPwRERERZQgzf0RE3cTzZC5I+WPvssmzki8KRQNZ2dBZW5eXDZwLWpPn6g2cATfjV8zFYtnN5hUCe10+cMfkg4q1LLN5Od++HQAC2GPkNoCbvfPFGF/L3Yl1vlH2a6pnBz2jZAnlNuqxq2cJVfJYbmAORlz7MWJM5NtZvdZ1IvPnuy+IspcXy/J5dLfxPftYyunC89xmy1szSlNqp5F17O44Eg9nFNn7KVeUBuTi/VbJuc9JpSybPIv3upL5k3zl86Cz8MofERERUYZw8kdERESUIZz8EREREWUIM39ERD2E2vtL9gtTewHa62QmCXAzfrKvn8z3ta6zl2W+DwDq89UzfoXQzeYVgrJ9bkp+L+/bY0KRHQth3w64mT8tmxfEZTHGPn8tdyf3o+1X5uxS5fliOcZ9fL3Yvt9O3z9lv7Uwss9f4Gb+5LpKUHDGlMW6UlBnLftKT0fZR9HztN6G9ovRyfMpff7kwxkpmb+KeCoroXwvKdsEyTncxD5/SrBRW9dVeOWPiIiIKEM4+SMiIiLKEE7+iIiIiDKEkz8iIiKiDGHBBxFRN5HFG9ofdvfTNHnOiSbP2h+jl02eRcFH3s33o5Cr3sC5dZ2dmK/P2QUVsrgDAAp+yT62WAaAnGevC0WhRhi72wSiOMKPlWbMYj9OM2Z1G7EuRVGIHKPt11mXpuAjEtto+1X2k8T49mvIhO70IA7tYo4wdAs+wny9tRyIgh8/VM7XTz5fWeDhFHMoBR9RLF7zkVK8IQo6yuJuaw3TA1G0IguugOTm7PJ9rW2jFoClaA6dBq/8EREREWUIJ39EREREGcLJHxEREVGGMPNHRNRDaE1e/SAQy0ouUGSOQqUxbVJT54KS+ZNNnWW+DwCKoZ2hKwZ2Fq8uaHa2yYs8X94oY8pbrGWZ8QsiN/PnRyIfF7vNoz3ZfDlN7i5Ffs/J+KXJ5skmz9p+k/ZTcTOVzjYpGkF7nnhdKZm/IF+0T6VQ74zxiy32ckE21Vbuo3ztKZelYvG6j429XIndjWQz5kroPrfyvZITd7usvJdkDjBQsroyB+jkewO3EbuW8esqvPJHRERElCGc/BERERFlCCd/RERERBnCyR8RERFRhrDgg4iom3giKK4FvpOaxQJALhRNnpXGtE6wXYTs8zk3DF8UDXnrcm4BRX1YvcCjzmtytilE9rp8ZYszJhQFH0HF3q8XuefiR6KBc5oCClmooTVIlgUTSgGFU+Ah96Oei7gPFfc+OQUdYtmU3W2MM0YpConF+ckGw/IFAsCvqxPLvZwxXoNd8OEUtSiPrxHFJkZpUh6JJtSRKJioGHebcmSvKyvNmENZLCWWA6UBtVvM4QxJ1bBZ8r3kz4POwit/RERERBnCyR8RERFRhnDyR0RERJQhzPwREfUQWsZHZvy0PyKfpslzXmb8xKd/IXRzbAXR1LkucLNjSRm/uvImd7/lj63lXMvHzhi/ZO/HK9vZQr/iNnlO1dzYyeIl5/m0xs/bfGx5O1Bbfq/Fvt9xs9sgO2q2c3dRi/tYxSUlB7gVX75gAOQa7KbOYd8+7nbieQq1+y3kZZ7Pd6cmlcA+n7IYk/Pdpsk5kQvMKXnZMLCfW99pxqy9J5NzuO77dsfl+dLglT8iIiKiDOHkj4iIiChDOPkjIiIiyhBm/oiIuonzx96VhmHOGK2Hn+iLpmX+cmKd7OtXzLlZt2JQEcst7hjf7sdXV7IzfsWWDe65bLHXBVvcXKDXInr/tYhsm+x/B+i5OknL9HUG0TfPiHMxSl9C2dcvVvrxyYyfzPNVNrs9Ekub7Lxky4bNzpiWTfbjacT5B3k3Q1e/c29rudcu7ushL+53IF7TQahkCcO8tRyFBWdM2bfX5T2ZAVT2G9jPtcz3ta6T52IvK3E+eOLtpeUC5RhPrJDZQkDpDah9HmhNBWvAK39EREREGcLJHxEREVGGcPJHRERElCGc/BERERFlCAs+6BPlqkcurmm76yf8spPPhKjzaY1ffS9FQ1k/ucmzDLLnQyOW3WKJgmjqXPCVgo+KXUwgCzzymz9wz3fTR/aKjzc6Y0yTvV/ZzFgWVKTlBOZlyN6v7ZqIkc2iEwpAAMCIAo9YafIcifstCzxKm9yCj81r7cdz47tu0c2aZz9y1iUZNHqAtdxfaR7dWywXcnYhhl9wizmCfNFaDnN1zphcaK8LA7GN7z52oSgCCTy34CPwqzd5Dty6F6fJs9avWe7HLeZILvjoSrzyR0RERJQhnPwRERERZQgnf0REREQZwswf9Wi1ZvyS9sMMIH1SyByQzBtp62S+D1Ca2YoGuDnfzaTlfZH5M26+rFD62N6PbOC88UNnG/PhWms5+sjNn5XX242fIyVfJjm5KnmnAfgig+aJUJe2jZbPSuI0eY7dvJls6hwr97Gyxc5ZljfbGcCmD9wG2R+vsZ+TWvJ9mvcWrk0cE9bZmb6wod5a9ns1ONt49Y3WclBpdsaEkf3YhCKPGnru6zcQ60LfbfAd+PbzLWN3Mk8LAJ7IDqr5vYTXzI7M92l45Y+IiIgoQzj5IyIiIsoQTv6IiIiIMoSTPyIiIqIMYcEHZRILQKgn8EXDYS0EnqY5rNN0VtlPGNgh9VA0t80HbpPcvG+H7PNKED/fYjcUDjevt5ZlcQcAlP/xvrW8+d01zpimtXbhSHmLHfAP8u7XV67OLuaQxQcAEBbz9n4Koglx3l4GAF92+k0T1hcFHrHW5Llir9OKWspNLQnL2jZlZ11X0ApAGnex2zzXD2yylsNmt2jIK9mvKy9yX4t+bK/zjf3Y+VCKOTzZwNktuvETxqR7vzlDnObstdR3dGVRCK/8EREREWUIJ39EREREGcLJHxEREVGGMPPXydI0JWa+jIg0aZrFyixR6xh7OVD+We9m/pKbPIews2O5ipvXClo22ys22A2FK2vXOdtsWvWetbzujfecMX9/7H1n3db6jejtrCv2sTN+9TvVK2PqrGWnKbHIBAKAL/KF6vMkgl8mth9fY5QmzyU7xxaV3KyezAGWt9jLUdnNukUl97ncUSJxn2Qja9n8GgA8sc6P3MchiO11TuZPaeAsmzH7cJ+DQGT8ZMxO69Us1+mvB7mc5n284xo/88ofERERUYZw8kdERESUIZz8EREREWUIM3/bKU3Gr5ZtmAskIsDt2SezRIDbd0yLDsleZDLzF3hK5s/YOatQ6fPnN9uZv3iT3Z+vee2HzjYfvGnn+ZLyfZqPlmxMHPOZI3dy1hnRf68olqFk83zRj88PA2dMEpkBBIBY7Dcuu/3t4kpcdVnbb3dK7E2nna+Jqy8DzvMiM3+ekueT69JE6uQYrYefs80n8DLaJ/CUiYiIiKhWnPwRERERZQgnf0REREQZwskfERERUYaw4GMb1VLgQbWThS+d9fizoIY+qdI0gtX/0Lzcj70ceG7IPjB2AUIQlZwxXqnFWo622I2gt6yzC0AA4J1H3abOXaG02S2gyPeyi1iCvF28oRUsBLEsunALPpIKHWShSetKe50sAGndrnpBh2wuDQBh0f5qr9+j6IxpWukW72wrraBGNtH2C6JpdqAUy3RCxYRa8OHJgg9tzLYtA+57ST2fHdiwuRa88kdERESUIZz8EREREWUIJ39EREREGcLMH32iMKtHWZMmO5Qmp+Q2vFUyaHKbNM13RSbNiEbFUcnN3e0ocdnN0DmNlZ0myu42Ms+nXjUx234tRR5LywXKdfJcgpx73EKjnbPzA/cFkW+o/vUvs5Ct29j7bdyltzOm2K/R3k+dnTf0cjnlYPaxjK+cWw/P0CVJbH69g/HKHxEREVGGcPJHRERElCGc/BERERFlCCd/RERERBnCgg8iIlIZ2XxXa8Yrmgx7Irzvhz37GoMsqEhTdKE2bEb1Zsz6fqtvo3EKPvLJX+N+mFy8IZtF5+rcwox8L3ubYr8GZ0yusd4+vzq76bOXE02fAcAX56cUd8jXorOMnlVQ0dP17HclEREREXUqTv6IiIiIMoSTPyIiIqIMYeZvG8kmw1c9cnE3nQkRZYExyc2Y5RBtE5mJMqb6srZN7LnZMYgMl9/LznzVDejrbDLkuEHW8juPvufutxOERfcrTubfZCaxpzXjlecjs3lqP2TfzusFefcF4TSuFo+LlvnL9bIbNhf6uJm/sKGXfZxiwR6QF8sATGgfywTuseVrz3k9K5m/NK/xrOKVPyIiIqIM4eSPiIiIKEM4+SMiIiLKEGb+tpPMAGpkLjDNNkREGi0DGIseclr7uEisi0T+qWLcPF8lsPN8Ua7ojInzdh83v6HRWi4O2MnZpv+wLdZykHOvQ3y8ZrO1vO6F9dZyn8/a2TIAyNXbWbG6fnXOGNmrLizYX4NaTzy5bkfmAmXGT8bhTJx8Ltr5OvsV/QLDOjebJzN/sqcf4Pb184viNaP0+ZMZPyP7/gEwIvPnZABTZFbVLGxCXlbbRm3zmEDvDdl9eOWPiIiIKEM4+SMiIiLKEE7+iIiIiDKEkz8iIiKiDGHBxw7AAg8iqpUs5jBKMUeakHokCgOi2P63f6QVfMAO4lcCtwggKtqFF35Db2s5198t+Ggol63lIO829a3vv9Fa7j24tzNGkg2bZTFH6xj7fspChzTFEWkYreqmE8hz8VKcmnxcWteJx6EgCmHq3ec6rLOLN5xiDgCe2A9yYj9awYdYF/tKk2e/esFHbNz7KItA4lSNoJ0hifT3ZM8q8JB45Y+IiIgoQzj5IyIiIsoQTv6IiIiIMoSZPyKiHkLLCcl1sTrGXpYNnQG38bPMAJZjJfNn7K+IUqg0Tc7bmb+g3s7m+X2anW1y4mS8nJvxkg2F6zbb+4lKdm4QcBvppmnG3Fl5PvfYvrg92ubj1Epm/NTG1TnR1LloPwcyAwgAgWj87OfdMZ5cF9rHMaGS5xNNnmW+D1AyfuLalWzoDLiNzLVG0LL3srOcImP7ScQrf0REREQZwskfERERUYZw8kdERESUIZz8EREREWUICz6IiLpJLLrDyqIBjVYUEkWiKETZTyUSBR6R/W9/reCjZOwgfilwm/qWc9ULPnJRxdlGXnXIBUoz5qJdXJBr2mItR1tanG3iyC6qMBW3yEIWaziPZ4pijljdb/UxWvGJ1hy4K2hFLbIIxAvsZVkQAriFOU5DZwCebOKct59HE7gFH5ANnH332LHoZh3D3ibSmjwjuYGz2yA9xTay8bpWhBXL5eTCrR2JV/6IiIiIMoSTPyIiIqIM4eSPiIiIKEOY+SMi6sHiih0ekvk+bV3ZjdlBxtQqsslz5Gb+yrHI/PkFZ0xLrt5aDuKStewpmb9QBKK0TFooMmdx0c4b+s1u82jTYh/bRG42LxYPjswFytwgAMSiobSW34tKyoO+jbTMp9ZQ2j6X2q7hyO3kstYYWuYC5TIAQDbsDmSTZyXPJzJ+RmnyHPmiEbS4dhUrmT+5TjZ91ta5GUBnk1SNoGvJ9GnZwa7CK39EREREGcLJHxEREVGGcPJHRERElCHM/BER9RBa5ktmh7QxkewhpuQCyxU721SqVO/7BwClyP6KaPHdvm45v85aDnJ29s0ruhk6T2T+As89thfaGS8/t9leLriZv7jFXiczgADgV+z8XizGeEpgUmb8omZlv2H1Pn/GfRhS9Q+Uz6XMAKp5SfHV7oda70K5H5GH054TmfHTMn8ir2cCuez2+YvFOpnvA4DYE70ATXLmT+b54ljptSjeKlEsl5UcppMLTM7qyd2k6umZYkyteOWPiIiIKEM4+SMiIiLKEE7+iIiIiDKEkz8iIiKiDGHBBxFRN3H/2LsWzLfHRDKRDrcooFxxg+KylkAWgLRUlIIP0eg3r4T1Wzy7+XIQ2gfylUoHWfBhlOKCQBQO+LLxr9Is2BfrTN4tzDCyObRsXKwUichiCLUZs2gOLQsx1GIe8aREZfexikpKpYh1nOpNoFvHuIUOQcEu3klVXCD2ozZ5FoU6ssBDa+DsFny4z22EoOpyRWvyLAo81CbPcQ1Nnp33rTtGFoF0d4GHxCt/RERERBnCyR8RERFRhnDyR0RERJQhzPwREfUQapbMyMyf0uRZ5AAryphKReYCZZNnNw/VEtm5qlzkZv4Cz86kBYHI/IXJmT+VJ85HZvOUzJ9sDO2V3EbQSY2KtfyhfA58rRlzSWbZ7GbSsqkyAERl8bw1l50xlRb7WDInqOX5cnXu8ySFRbuZtXZ+ktNQWsnvOY2fnabPyQ2cI0/L/NnrohRNniuxvU7m+wBA3m2nybPWnDtFw2aZC3QygCkaQ3clXvkjIiIiyhBO/oiIiIgyhJM/IiIiogzh5I+IiIgoQ1jwQUTUTWTIXguORxV7TKw0eZZFILLpM6A1eZbLbhi+VLGD+M2+u9/AswP8oSgA8ZUmxF6YXFzgFF6IZV8WhADwxRi1CbHcj7g5VoL4vkj9G6Xgww/t5tBpGkNHJftJKG12G0y3bLLXVZrtbbzAvYZTaBTnqxxbFoXktcoGySnCUaYQssBDFOHEvlLwIZo6R9oY8UxFRjR5jt3HQTZ1jlM0eZYPg/J2Q+QUc7hj5HvQaQytFXc5+1WavqcplkqBV/6IiIiIMoSTPyIiIqIM4eSPiIiIKEOY+aMd5qpHLk4cc/2EX+6AMyH65JDNYLWsUEXkAssVd0ypJMbk7X/7t5SVZsGhvS6UTX4BhCKfFXj2sT3PPRdPZgfTfBN58lxS5PmUhs3uvbSpV0RkNrPsNmP2QtGEWjxWWu6uvMXeT9OHblPq5nUt1vKWd1ucMVLDPnXWclx283y5+ry1XOyXIksmn38lU+lk/AK57D7ZlcA+lwjufitG5AJj0RhayfzJHKAS1XSbOotltYFzJBuvu4+dzI46TZ6V/e5IvPJHRERElCGc/BERERFlCCd/RERERBnCzB91ijR5vs7YDzOB9Gmm/bF32QtQ7/Nnr6uU3TGVyP63fknE1nLKt0FLmsyfyH35XiiW7TwXAHiBuJ/KZQiTE5k5r/qyRhsj+wM6I5Q+ap5o/ubLJokAgpLs89eUeH4y8/fRko2J26Tx8d+2VF0GgGJfkQvUAnGC0zdRy/yJjJ8RGb8ocF8PkXjNRMrUpCL6+pXj5D5/lUj08IvdxKe82zJTq/X5i51coDvG6dmXIuMXm+S+n52FV/6IiIiIMoSTPyIiIqIM4eSPiIiIKEM4+SMiIiLKEBZ8EBF1ExnojmWSHG4xh9pQVjR5lk2fAaBSNmKMvazUMKBcsQPyLYF7vUCu8mDv10/qqgxA6enrdmMW31YmsV0zEKYYI/lK0Y0s+PCUJs9+s13I4IsKGi/VA9F95PnJptWt6+xiDuTc4g0j1sVBwVqu+O42cp1s6AwAlTgUY0QDZ7XJs32fZAEIAFTE616+D+T7BAAipyhEe99WL/jQi7t2XONnXvkjIiIiyhBO/oiIiIgyhJM/IiIiogxh5o+IqIfQMj9OLlBt8pz8h+ZlDrBcFk2fQzcPJWNfgZJbk+t8397IU6JuMheoEsc2YkdGOV8pTS5Q8rTMn8h0+ZHbENk0t1jLYS+7iXJYdLNuQa77rr/IYwd50Zy7aGf1AACForVo8kVnSByK/F4oMn9Kk+cy7HVlJfMnmzqXI7mc3ORZ62Mt3yqx815yXw9pxjjvWyMzgO57NE1jaNmEula88kdERESUIZz8EREREWUIJ39EREREGcLJHxEREVGGsOCDiKibGBNXXQa0RtBu4LsikuyVitugVxZ8uAUgbnFEWXxDtChjfNkc2Eu+puB52x5aN4Eo+FAqSYwoYjDKuch1uRTnG4jnxYvdygG/Yjd+Dkslaznf+LGzTaHRLZjYUeSxcw29rGW/zi5YAQAU7XVxwR0T5eqt5XJojykF7n0ux7mqy63rqhd4lJQGzrJJecntza00O08u5qiUkxuvy3WyUEvbZkfilT8iIiKiDOHkj4iIiChDOPkjIiIiyhBm/qhTXD/hl9byVY9c3CX7Jfo0U5u6murZIQCIZZ6vnNzkuVQSjYsDrYGzfX1A5vtax4gmz2KIr2bq7K8eY5T8nmjQ7Cwr5yLXaY2gnayg1oU6gZuodBtBh6IRdLHsBs56N9vrhhxXcsa88+h723x+0u7jBzvrGnftby3n+ve1lv0+9jIAmPpGazkq9HLGlPN25q8kMn9l4zZ5LomMXylypyYlkWNtqdivK5nva11nL2tNnstlI8aI5YqS5xPPtXz/AYAx1bO6akN3uY2SAe4svPJHRERElCGc/BERERFlCCd/RERERBnCzB91CWb1iJKlyQHJPFGkBJeSeooBbg6wEoo+f6UUeb7APT/fycwlZ+hik3zdQcSfIB8a2fcPACJxPSPW+vyJHGDs2Vkyo1wTMb5I+Sk5QZkD9ERe0u1cBzSIMWHRHdV7cB9rOSpVnDGS3E+vXfo7Y4q7DLC3GTDQWja9+znbVHrZ51Iq9HbGNOcarOUWz878NccFZ5uWyD7fZiXz1xLJPn/2c6Bl/mRfP5nva91OZPxkBlDL/EXJvQDl+1a+J9X3erzjev/xyh8RERFRhnDyR0RERJQhnPwRERERZQgnf0REREQZwoIPIqIeQmvq6jR+TfFH5CtltyikEtr/1i+JgglPK2JwxjhD4F5DSC4AcYo3lAIQI2ofZJNnrWikIJpFx4E7JhbnG/n212Ccd1s4x6LgwykAAZAL7BMOxLIfusUc+aJdDBH2bnTG1A1uso8tm0X77n3083Yj5aCxwRnj9e5r71cUeFQa3IKPljp7XXPePd9m32783BwX7eXIbfLcXLEfm5ay+/g2l30xxhPLziZOMUdJK/hIKPDQCj7k+0u+/7R1RjaG1t7raRpBd1JRCK/8EREREWUIJ39EREREGcLJHxEREVGGMPNHRNSDycavanNYkS/SMkgyu+SL5SBwtymVknOBgNxONFo2WubPXmeUMbLJcxTbY6JQafIs9hMZJb8ncoAVkd+LlDxfJW9n0iq+m98rBHaWLRfaWbegaGfhACCot5sk+302O2P8cos4GdHkWXtORL7QiGwhAER1dl6vUrBzgaWi28B5S87eRub7ADfj1xTZTZ23lN3M35ayPRVpriiZP/FalMstJfd9kabJc1LGTzZHB9ymzmlyuG4GUMnzKTnArsIrf0REREQZwskfERERUYZw8kdERESUIZz8EREREWUICz6IiLqJbNiqF3PIhrJuA+eoYq+rlN1/1wei0EEWb/h6B+dEWoPmpNudYo5IKd6IRTNm8dhUYncbuS4K3WNXxH7zgV1cUPHdr8VKEIplt+CjHNiFDvnQLrLI5d1Gy2Fxi7UcVFqcMX5UslfIB09hxPnGQcEZU87Z51fK1VvLLYG9DAAtsO/jlqjojJFNnLekaODcJNbJYg5tXYt4WOQyALS02O+vUkkrapJj5HtJaZheTh4ji0Dke1srEnGbPHddAQiv/BERERFlCCd/RERERBnCyR8RERFRhjDzR0TUQ2gZnzRZoUhkjoJQaVQsxni+aOCs5KzSSIqgabcbmc1zI3SIxZiKyAUWckrmT4wpR+71jXxYPfOXD9yvxVJsr8v7bqPivG/n6nKByPwFbigtzNtdiMPYHeOLxr+eWDaeex9jsS5SmlKXPfs+lGEvt8TufSyJPF9z5D5WzSLj1yzzfGX3eWsRGdVmLb8nmjg7mb8W930h17W0uNm8snhflGXmr5L8ftOaqsscrszqyubtgJb5UxpBK+tqwSt/RERERBnCyR8RERFRhnDyR0RERJQhzPwREXWTWjI+WlZI5okq5YozRvb1k5m/Whkjz8/OeEWRe59kD79yRcvviWyeiGtVlG3yIgdYDt0xJbFOZgBzgXtNJCdygQUlF5gL7Dxc6NvPQc5382aBZ68LlTEeTNVlTSyu61SMmwGNxPNUEvk9mXMEgFLF3qYlcvdbqsj8nujPp2X+nJ597n2U68plkQFU8nyyh5/M97WOke+dWCwrff5knk95v8lsbiyyg7J/JwDEMs9p2OePiIiIiDoBJ39EREREGcLJHxEREVGGcPJHRERElCEs+CAi6iG0gLcMgatBcd/+d7wMlwNAxXND6dtKK0iJRGFG7DSlVhpOV+wxuZx7HaIiCkVKonAgn3e3kYUDsgAEAEJR8JFzCkCUbQJZFOLep1xon28oGiuHvvvYhb79PPme+7zVUpcjn6bYKI+vLLqJqi8DQEk00S4pxRslUYgjn5NyRSvmEPsouY+DLPAoicIM2Zy5dT/VGzgDboFHuVSputy6TXKTZ1l0JYuyurKBcxq88kdERESUIZz8EREREWUIJ39EREREGcLMHxFRN5EZP5mXA4BYNJSNPfff7JFoFiwbOtcqTRNqmfmTTZ0rOTcPFYpGwJWym6ELRQ5QZvVkBkwb06Ll95zMn7g9cLfJiexg6J4uwsA+n8APqt7eOsZe1ho4y6dSZgC1mJjouw0DrYm2XPaqLgOAjL9VlBipzPSVymIfSp5P5jvLyhi5XaWSnPmT2TyZAQSASIyRDZzVJs8iz6c1VXeaPKfK/CV/HnRW42de+SMiIiLKEE7+iIiIiDKEkz8iIiKiDOHkj4iIiChDWPBBRNSDyWC4bPoMAJ4Ik0c1dAY2skpAWac1sw1E1YIMulfK7jWGMGcXQ2hFIUFZFnzYy4FSmFHLGFkAom8jxiiXTULxbRqI58BXnhPRm1vdryQLQJSnzaE8bZC9wuUY2YgbUIpEtDFinVOYoW0jGy2rRRbydSULNbRGy2maMYsxTjGHUiQi32/aGFmoJc5P7gPQCqw6p7hDwyt/RERERBnCyR8RERFRhnDyR0RERJQhzPwREfUQWsbHiKyYbPqcft8iTyTCYn7kXguQOSU/dMfIzF8UhVVvB9yMVDnFfv0gOc8nt5GNogEg8OV+kzN/cp02xtmPeN60vttyPzVENVWyN7CWC5QNhOVLTzbrBtxcXS1jtGyezOLJfF/rOpGhc7bRsnmy0bK2XzvjlyabJ/cj832tY2TeUGYAk7OEeiPoFCHPFHjlj4iIiChDOPkjIiIiyhBO/oiIiIgyhJk/IqJu4uTwPDfP4/5x987K/Ik8X2z33gPcbJOaCwxEzz6RvZJZPQAIQnsbTwnEBWH1zJ+vbCMziVreUB7Lyd0p+UN5LJnv044lt/GUSy3yXLRegLVwXzPKmEj2j0zu6Sizg9oYNzOXvF8nQ5eiH1+a3J3Mtcpzaz1WLfsVWcIU+5V5XrkPbUxX4pU/IiIiogzh5I+IiIgoQzj5IyIiIsoQTv6IiIiIMoQFH0REPYQWAo8rYkXoFmbIIhCtEWwsigl8Y+/HV0LrnmxUrBQk+KLgwxdNlLViDrmfIHDvU9Kx9SKR5EISuR9ZZKEVqLjFG8n7TVPMoZ1fEnmcNE1/ZeGDRhZZpGkwLItEALdgwmkmrRWSxMkFH/LYaRoiu+eiFZtU34+236RiDnU/JkUhScI2nYlX/oiIiIgyhJM/IiIiogzh5I+IiIgoQ5j5IyLqJm5WSPn3uC8byir7cfJxSsZLRIx8kVvylC7Ebu5OG6Oc0NbHSbVfLQ/nJ45xjlVD3tDJAGrdmBP2oe43RZ6vs5o6J0nT9FnmAtNk/tRjmerZQXW/shlzDRm6dBnFFPuVzZjV862ea9T2k3Rcbb/6feqcHCCv/BERERFlCCd/RERERBnCyR8RERFRhnDyR0RERJQhLPggIuoh9DC3/W90Iys3AHhGFA5E7hgprrGwwR2z7dcQ0hQ6aAUoyeey/eebZh+aNIUiSWo9dpI0hRqSVhzRGcdKU7CQrnF18n5SFbrUUJhRy7nUtN9OKu7Q8MofERERUYZw8kdERESUIZz8EREREWUIM39ERD1YuoxULXtOzgUS0acTr/wRERERZQgnf0REREQZkupn37a/u1epNHXpyRBRdrV9vsi/8/lpwc9RIupqaT9HU03+Nm3aBAD482P/vJ2nRURU3aZNm9CnT5/uPo1Ox89RItpRkj5HPZPin9lxHGP16tVobGyE53VNE0oiyjZjDDZt2oTBgwfDr6FxcE/Hz1Ei6mppP0dTTf6IiIiI6NPh0/fPayIiIiLqECd/RERERBnCyR8RERFRhnDyR0RERJQhnPwRERERZQgnf0REREQZwskfERERUYb8f4CuYtCDt2BYAAAAAElFTkSuQmCC", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -223,7 +211,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "id": "253f9c3e-95c9-49ad-b2d4-4fa409aeb36f", "metadata": {}, "outputs": [], @@ -233,12 +221,12 @@ "# Initialize an Experiment with the local launcher\n", "# This will be the name of the output directory that holds\n", "# the output from our simulation and SmartSim\n", - "exp = Experiment(\"surrogate_training\", launcher=\"local\")" + "exp = Experiment(\"surrogate_training\", launcher=\"local\")\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "id": "38c45f55-e7a4-4141-a445-85a6158eb12b", "metadata": {}, "outputs": [ @@ -246,18 +234,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "12:21:18 C02YR4ANLVCJ SmartSim[68607] INFO Working in previously created experiment\n", "Database started at address: ['127.0.0.1:6780']\n" ] } ], "source": [ - "# create an Orchestrator database reference, \n", + "# create an Orchestrator database reference,\n", "# generate its output directory, and launch it locally\n", "db = exp.create_database(port=6780, interface=\"lo\")\n", "exp.generate(db, overwrite=True)\n", "exp.start(db)\n", - "print(f\"Database started at address: {db.get_address()}\")" + "print(f\"Database started at address: {db.get_address()}\")\n" ] }, { @@ -281,18 +268,10 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "id": "537a1489-b4c3-4736-a628-b7af433a9cbf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12:21:38 C02YR4ANLVCJ SmartSim[68607] INFO Working in previously created experiment\n" - ] - } - ], + "outputs": [], "source": [ "# set simulation parameters we can pass as executable arguments\n", "# Number of simulations to run in each replica\n", @@ -308,11 +287,11 @@ "\n", "# Create the ensemble reference to our simulation and\n", "# attach needed files to be copied, configured, or symlinked into\n", - "# the ensemble directories at runtime. \n", + "# the ensemble directories at runtime.\n", "ensemble = exp.create_ensemble(\"fd_simulation\", run_settings=settings, replicas=2)\n", "ensemble.attach_generator_files(to_copy=[\"fd_sim.py\", \"steady_state.py\"])\n", "ensemble.enable_key_prefixing()\n", - "exp.generate(ensemble, overwrite=True)" + "exp.generate(ensemble, overwrite=True)\n" ] }, { @@ -376,26 +355,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "id": "5ce5c68d-38f3-40a5-a0c8-a7297036022f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12:21:38 C02YR4ANLVCJ SmartSim[68607] INFO Working in previously created experiment\n" - ] - } - ], + "outputs": [], "source": [ "nn_depth = 4\n", "epochs = 40\n", "\n", "ml_settings = exp.create_run_settings(\"python\",\n", - " exe_args=[\"tf_training.py\", \n", - " f\"--depth={nn_depth}\", \n", - " f\"--epochs={epochs}\", \n", + " exe_args=[\"tf_training.py\",\n", + " f\"--depth={nn_depth}\",\n", + " f\"--epochs={epochs}\",\n", " f\"--size={size}\"],\n", " env_vars={\"OMP_NUM_THREADS\": \"16\"})\n", "\n", @@ -403,7 +374,7 @@ "ml_model.attach_generator_files(to_copy=[\"tf_training.py\", \"tf_model.py\"])\n", "for sim in ensemble.entities:\n", " ml_model.register_incoming_entity(sim)\n", - "exp.generate(ml_model, overwrite=True)" + "exp.generate(ml_model, overwrite=True)\n" ] }, { @@ -421,12 +392,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "id": "8a2b2061-5de8-4039-a431-c895e0a8940b", "metadata": {}, "outputs": [], "source": [ - "exp.start(ensemble, ml_model, block=False, summary=False)" + "exp.start(ensemble, ml_model, block=False, summary=False)\n" ] }, { @@ -447,136 +418,139 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 14, "id": "eb96f840-0a52-47d4-b5e4-3f2f2a3a2ebf", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Default@20-19-36:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n", + "Default@20-19-37:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n" + ] + }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "20:20:52 HPE-C02YR4ANLVCJ SmartSim[15001:JobManager] INFO fd_simulation_0(18881): SmartSimStatus.STATUS_COMPLETED\n", + "20:22:06 HPE-C02YR4ANLVCJ SmartSim[15001:JobManager] INFO fd_simulation_1(18882): SmartSimStatus.STATUS_COMPLETED\n", + "20:23:28 HPE-C02YR4ANLVCJ SmartSim[15001:JobManager] INFO tf_training(18887): SmartSimStatus.STATUS_COMPLETED\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Default@20-20-52:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n" + ] + }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ - "12:25:30 C02YR4ANLVCJ SmartSim[68607] INFO fd_simulation_0(68661): Completed\n", - "12:25:30 C02YR4ANLVCJ SmartSim[68607] INFO fd_simulation_1(68662): Completed\n" + "Default@20-22-07:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAA4AAAAOCAYAAAAfSC3RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAGUlEQVR4nGP8//8/AzmAiSxdoxpHNQ4hjQB59QMZfQJbWQAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAAIklEQVR4nGP8////fwYqAiZqGjZq4KiBowaOGjhq4FAyEACzFQQkwb2h5QAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -598,7 +572,7 @@ " u_steady_name = ensemble.entities[0].name + \".{sim_data_\" + str(sample_idx)+ \"}.u_steady\"\n", " client.poll_key(u_steady_name, 300, 1000)\n", " samples.append(client.get_tensor(u_steady_name).squeeze())\n", - " \n", + "\n", "pcolor_list(samples, \"Simulation\")\n", "\n", "for i in range(0, epochs//10):\n", @@ -615,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 15, "id": "7bbce88c-6f63-407a-8912-5787139f015b", "metadata": { "tags": [] @@ -623,58 +597,51 @@ "outputs": [], "source": [ "# Optionally clear the database\n", - "client.flush_db(db.get_address())" + "client.flush_db(db.get_address())\n" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 16, "id": "7d9f2669-4efb-4f38-97e9-869a070ab79c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "12:26:24 C02YR4ANLVCJ SmartSim[68607] INFO tf_training(68664): Completed\n", - "12:26:28 C02YR4ANLVCJ SmartSim[68607] INFO Stopping model orchestrator_0 with job name orchestrator_0-CR7RNSKODOYG\n" - ] - } - ], + "outputs": [], "source": [ "# Use the Experiment API to wait until the model\n", "# is finished and then terminate the database and\n", "# release it's resources\n", "while not all([exp.finished(ensemble), exp.finished(ml_model)]):\n", " time.sleep(5)\n", - " \n", - "exp.stop(db)" + "\n", + "exp.stop(db)\n" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 17, "id": "2bca8a25-6e1b-4540-9d1e-932eb52d7b1e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['Completed', 'Completed', 'Completed']" + "[,\n", + " ,\n", + " ]" ] }, - "execution_count": 10, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "exp.get_status(ensemble, ml_model)" + "exp.get_status(ensemble, ml_model)\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "id": "50b42065-6356-4a5a-b742-daca17b8bd6e", "metadata": {}, "outputs": [ @@ -683,35 +650,43 @@ "text/html": [ "\n", "\n", - "\n", + "\n", "\n", "\n", - "\n", - "\n", - "\n", - "\n", + "\n", + "\n", + "\n", + "\n", "\n", "
Name Entity-Type JobID RunID Time Status Returncode
Name Entity-Type JobID RunID Time Status Returncode
0 fd_simulation_0Model 68661 0 231.9948Completed0
1 fd_simulation_1Model 68662 0 231.7866Completed0
2 tf_training Model 68664 0 285.1160Completed0
3 orchestrator_0 DBNode 68629 0 309.7907Cancelled-9
0 fd_simulation_0Model 18881 0 309.6291SmartSimStatus.STATUS_COMPLETED0
1 fd_simulation_1Model 18882 0 384.0497SmartSimStatus.STATUS_COMPLETED0
2 tf_training Model 18887 0 464.0114SmartSimStatus.STATUS_COMPLETED0
3 orchestrator_0 DBNode 18822 0 476.6033SmartSimStatus.STATUS_CANCELLED0
" ], "text/plain": [ - "'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 fd_simulation_0Model 68661 0 231.9948Completed0
1 fd_simulation_1Model 68662 0 231.7866Completed0
2 tf_training Model 68664 0 285.1160Completed0
3 orchestrator_0 DBNode 68629 0 309.7907Cancelled-9
'" + "'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 fd_simulation_0Model 18881 0 309.6291SmartSimStatus.STATUS_COMPLETED0
1 fd_simulation_1Model 18882 0 384.0497SmartSimStatus.STATUS_COMPLETED0
2 tf_training Model 18887 0 464.0114SmartSimStatus.STATUS_COMPLETED0
3 orchestrator_0 DBNode 18822 0 476.6033SmartSimStatus.STATUS_CANCELLED0
'" ] }, - "execution_count": 11, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "exp.summary(format=\"html\")" + "exp.summary(style=\"html\")\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d11562b1", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "smartsim", + "display_name": "ss-py3.10", "language": "python", - "name": "smartsim" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -723,7 +698,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/doc/tutorials/online_analysis/lattice/online_analysis.ipynb b/doc/tutorials/online_analysis/lattice/online_analysis.ipynb index 3389b1190..c5f58fa97 100644 --- a/doc/tutorials/online_analysis/lattice/online_analysis.ipynb +++ b/doc/tutorials/online_analysis/lattice/online_analysis.ipynb @@ -90,7 +90,7 @@ "\n", "from smartredis import Client\n", "from smartsim import Experiment\n", - "from vishelpers import plot_lattice_vorticity, plot_lattice_norm, plot_lattice_probes" + "from vishelpers import plot_lattice_vorticity, plot_lattice_norm, plot_lattice_probes\n" ] }, { @@ -121,7 +121,7 @@ "# Initialize an Experiment with the local launcher\n", "# This will be the name of the output directory that holds\n", "# the output from our simulation and SmartSim\n", - "exp = Experiment(\"finite_volume_simulation\", launcher=\"local\")" + "exp = Experiment(\"finite_volume_simulation\", launcher=\"local\")\n" ] }, { @@ -144,7 +144,7 @@ "db = exp.create_database(port=6780, interface=\"lo\")\n", "exp.generate(db, overwrite=True)\n", "exp.start(db)\n", - "print(f\"Database started at address: {db.get_address()}\")" + "print(f\"Database started at address: {db.get_address()}\")\n" ] }, { @@ -188,7 +188,7 @@ "# the Model directory at runtime.\n", "model = exp.create_model(\"fv_simulation\", settings)\n", "model.attach_generator_files(to_copy=\"fv_sim.py\")\n", - "exp.generate(model, overwrite=True)" + "exp.generate(model, overwrite=True)\n" ] }, { @@ -201,11 +201,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "19:49:59 C02YR4ANLVCJ SmartSim[54122] INFO \n", + "20:36:32 HPE-C02YR4ANLVCJ SmartSim[25938:MainThread] INFO \n", "\n", "=== Launch Summary ===\n", "Experiment: finite_volume_simulation\n", - "Experiment Path: /Users/arigazzi/Documents/DeepLearning/smartsim-dev/SmartSim/tutorials/online_analysis/lattice/finite_volume_simulation\n", + "Experiment Path: /home/craylabs/tutorials/online_analysis/lattice/finite_volume_simulation\n", "Launcher: local\n", "Models: 1\n", "Database Status: active\n", @@ -253,13 +253,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "SmartRedis Library@19-49-59:WARNING: Environment variable SR_LOG_FILE is not set. Defaulting to stdout\n", - "SmartRedis Library@19-49-59:WARNING: Environment variable SR_LOG_LEVEL is not set. Defaulting to INFO\n" + "SmartRedis Library@20-36-32:WARNING: Environment variable SR_LOG_FILE is not set. Defaulting to stdout\n", + "SmartRedis Library@20-36-32:WARNING: Environment variable SR_LOG_LEVEL is not set. Defaulting to INFO\n" ] }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -269,7 +269,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -279,7 +279,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -289,7 +289,7 @@ }, { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvgAAADpCAYAAABRN0P0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAAxOAAAMTgF/d4wjAABywklEQVR4nO2de5AkR3Xuz/T0zL5md0er3WXR20g8LN7I2Fe2dCWHgMBYwmDg4rB4GfGQhGwcVvDGEkGAjQMUgA1hMLZ0QSBHXHBAgADbEoaQHCCMTAC2DAjtSqxeK2m1mn3Ne6buH7U5nX3mZOY5mVnVVT3nFzHRXZlZmVnVPV1ffnUya6QoigIURVEURVEURRkKOoPugKIoiqIoiqIo+VCBryiKoiiKoihDhAp8RVEURVEURRkiVOAriqIoiqIoyhChAl9RFEVRFEVRhggV+IqiKIqiKIoyRKjAVxRFURRFUZQhQgW+oiit5gtf+AI8+clPZpW99dZbYWJiApaWliruVT/ve9/74Jxzzqm1zVQuvfRSeMMb3jDobiiKoigRqMBXFCUrL33pS+ElL3kJmffOd74TzjzzzOi6zz//fHjve9/bl3bxxRfDz3/+c9b+5557Lhw5cgRGR0cBoLnCO1e/Xve618GrXvWqYLmRkRG4+eab+9I+9alPwd///d8n9yEWbt9T+f73vw8XXXQR7Nq1C7Zs2QJPf/rT4brrrusr861vfQsuuOACOP7442FkZATuuuuuVfXs3bsXLrzwQti8eTNs374drrjiCpifn+8r88lPfhJOO+002LhxIzznOc+BW265pdJjUxRl7aICX1GUrFx22WVw4403wn333deXPj8/D9deey1cdtll4jqxUFKUXDz66KPwspe9DH7yk5/AwYMH4a//+q/hrW99K3zlK19ZKbNp0yZ4zWteA5/73OfIOpaXl+HCCy+Ebdu2wf333w//+Z//Cbfccgu87W1vWynzxS9+Ed797nfDZz/7WZiamoJLLrkEXvSiF8G9995b9SEqirIWKRRFUTKyvLxcPPGJTyyuuuqqvvQvfOELxaZNm4qDBw8WMzMzxdvf/vbitNNOKyYnJ4tzzjmnuO2221bKXnfddcWJJ55YfOITnyhOPfXUYmJionjzm99cdDqdYmxsrNi0aVOxadOmvrKGhYWF4pprril+9Vd/tZiYmChOPPHE4kMf+lBRFEXx7W9/uwCAYmFhofj85z9fjI2NFZ1OZ6W+W265pTjnnHOK973vfX19/+IXv1js3LmzmJubI48ZAIprrrmm+LVf+7Vi06ZNxXOf+9ziBz/4wUr+1VdfXfzWb/3WyvZjjz1WvPGNbyxOPPHE4vjjjy9e+MIXFj/72c+Koiic/aL48z//8+KJT3xiMTExUZx00knFFVdcURw9erQoiqL44Ac/WHS73aLb7a7U88tf/nJVHWeeeWYBAMX69euLTZs2FS984QuLoiiK1772tcXFF1+8Uu7UU08trr766uIFL3hBsWnTpuKMM84ovvWtbxXf/va3i6c//enFxMREccEFFxQPPvjgyj4zMzPFu971ruIJT3hCMTk5WZx77rnFD3/4w5X8f/u3fyvOOuusYsuWLcW2bduK3/zN3ywOHDjg7fttt91WnHfeecW2bduKU045pXjve99bLCwssD8LDr/3e79X/Mmf/Mmq9LvvvrsAgOIXv/hFX/p3vvOdotvtFo888shK2le+8pVi48aNxczMTFEURXH++ecXf/qnf9q337Oe9azi/e9/v6hviqIoHFTgK4qSnWuuuaY44YQT+oTXueeeW7zpTW8qiqIorrjiiuJpT3ta8Ytf/KKYm5srPvKRjxQTExPFvffeWxRFKdpHR0eLN77xjcXhw4dXROt5551XvOc97+lrCwv897znPcXpp59efP/73y+WlpaKRx99tPjud79bFEW/wC+K1cK7KMqByMknn1wsLS2tpD3vec8r3vGOdziPFwCK008/vbjjjjuK2dnZ4uqrry62b99eTE1Nke1ceOGFxfnnn188+OCDxdGjR4u3vvWtxUknnVQcPnzY2S+Kz33uc8Uvf/nLYnl5ufjv//7v4vTTTy/e+c53ruRjke7r/0033dSXRgn8U045pfjRj35ULC4uFn/2Z39W7Nq1q3jpS19aPPzww8WhQ4eKs88+e+UzNnVccMEFxb333lssLCwUf/M3f1Ps2LGjeOyxx4qiKIoTTjihuPbaa4vl5eVibm6u+O53v1scOXLE2fef/exnxaZNm4p//Md/LBYWFop77rmneMYznlF84AMf6DsW32cR4uDBg8WuXbuK6667blWeS+B/7GMfK570pCf1pd1///0FABQ//vGPi6IoisnJyeKGG27oK/PGN76xeOlLX8rql6IoigQN0VEUJTt/9Ed/BI899hh89atfBQCAO+64A2699Va4/PLLYXl5Gf7hH/4BPvCBD8AZZ5wB4+PjcOWVV8ITnvAE+PznP99Xz8c+9jGYmJiAjRs3stotigI+/vGPw4c+9CH49V//deh0OrBt2zY4++yz2X1/+ctfDrOzs/DNb34TAAB2794N3/72t+FNb3qTd7+3vvWtcOaZZ8K6devgqquugm63u3L8Ng8++CDceOON8LGPfQx27doFGzduhA9/+MMwMzMDN954I7ufAACvfvWr4ZRTToGRkRF46lOfCm95y1vgX//1X0V1SHjDG94Az3zmM2F0dBRe85rXwL59++Btb3sb7NixAzZv3gwve9nL4D/+4z8AoAx9+exnPwuf/OQn4aSTToJutwtXXHEFbN26deU4x8fHYffu3fDAAw/A+Pg4nH322bBp0yZn+5/85Cfhoosugj/4gz+AbrcLp556Krz97W9fFTPP/Sww8/Pz8MpXvhKe8pSniOL/Dx06BJOTk31pxx133Eqer4zJVxRFyYkKfEVRsnPcccfBK1/5SvjUpz4FAOWEzbPPPhue+cxnwv79+2FmZgZOP/30vn3OOOMM2Lt378r2zp072cLesH//fjhy5Ah7VR2K8fFxuOSSS+DTn/40AAD83d/9HVxwwQXwhCc8wbvfr/zKr6y873Q6cOqpp5Lx1SbNPv6xsTE49dRT+46fw6c//Wl4znOeA8cffzxs3boV3vOe98DDDz8sqkPC4x//+JX3RojjtMOHDwMArExE/Y3f+A2YnJxc+bv//vtX5md89atfhT179sBZZ50FZ5xxBlx99dWwuLjobP8Xv/gFfPnLX+6r77LLLoN9+/b1leN+FjbT09Pw4he/GObm5uBrX/sadLtdzikBAIAtW7bA1NRUX9pjjz22kucrY/IVRVFyogJfUZRKuPzyy+Hmm2+GH//4x3D99dfD5ZdfDgAA27dvh/Xr18Pu3bv7yu/evRtOOeWUle1OZ/XPE5Vms337dpiYmIA777yT1UdXfW9+85vhX/7lX2DPnj1w3XXXwaWXXhqs65577ll5v7y8DHv37oWTTjppVbmTTz4ZAKDv+BcXF2Hv3r0rxx86TgCA733ve3DFFVfANddcA/v27YODBw/CBz/4QSiKInh8mJGREVY5Cbt27QIAgJ/85CcwNTW18jc9PQ3vfOc7AQDg6U9/Otxwww2wb98++NKXvgSf+tSnVtx4qu+7du2CP/zDP+yr79ChQ3DkyJG+ctzPwvDYY4/B8573POh2u/CNb3wDJiYmRMf6rGc9C+6++2549NFHV9Juv/122LhxIzzpSU9aKfODH/ygb7/bb78dnv3sZ4vaUhRF4aACX1GUSnjuc58LZ511Fvz+7/8+jI+Pwyte8QoAKIXb61//erjqqqtgz549MD8/Dx/96Efhrrvugosvvthb565du7zifWRkBP74j/8Y3vWud8Htt98ORVHAgQMH4Hvf+56zvr1798Ls7Gxf+mmnnQbPf/7z4RWveAWMjY3BRRddFDzej3/84/DTn/4U5ufn4YMf/CDMz8/Di1/84lXlHv/4x8OLXvQiuPLKK+Ghhx6CmZkZeMc73gHj4+Pwu7/7u95+2Rw8eBBGR0dhx44dMDY2Bj/84Q/hE5/4xKrj2717d3Dd/127drGXGuVy6qmnwkte8hJ4y1veAr/85S8BAODw4cPwzW9+Ex588EGYn5+H6667Dh555BEAANi6dSuMjo6uOOdU3y+//HL40pe+BF/84hdhfn4elpaW4K677oJ//ud/7mub+1kAAOzbtw/OO+88OPnkk+HLX/4yrF+/flWZ5eVlmJ2dhbm5OQAoQ3lmZ2dX+nbuuefCU57yFLjyyivh8OHDsHfvXrjqqqvgkksuWanv8ssvh2uvvRZuvfVWmJ+fh7/927+FO++8E173utclnGVFURQaFfiKolTG5ZdfDnv27IHXv/71sG7dupX0j3zkI/CCF7wAfvu3fxt27twJ//RP/wQ33XTTirvt4sorr4Sf//zncNxxx62KZza8//3vhze84Q1w8cUXw+bNm+EZz3gG3HrrrWTZV77ylfDkJz8ZTjjhBJicnIR///d/X8m77LLL4Ic//CFccsklrHCNyy67DF796lfDtm3b4Ktf/Sp84xvfcPbx+uuvh9NOOw2e85znwEknnQR33HEH3HzzzbB58+ZgvwwveMEL4NJLL4Xzzz8ftm7dCu9+97vhta99bV8ZM29g+/btMDk56QwB+su//Ev4q7/6K5icnIQLL7wweKxcbrjhBjjrrLPg+c9/PmzevBme/OQnw2c+85mVuwxf+tKX4KlPfSps2rQJzjvvPHjd6163cgxU35/73OfCTTfdBJ/5zGfgxBNPhOOPPx5e/vKXrwwgDJLP4tOf/jT813/9F9x4441w3HHHwcTEBExMTMDv/M7vrJS55ZZbYMOGDfCUpzwFAACe+tSnwoYNG+D6668HgHLQ+rWvfQ0eeeQRePzjHw/Pfvaz4ZxzzoEPf/jDK3W84hWvgA984APwqle9CrZu3Qqf+cxn4Otf/3rwO68oihLDSGHfz1UURVEAAOCnP/0pPO1pT4O77767L3SIYmRkBG666SZ43vOeV1PvFBf6WSiKoqiDryiKsor5+Xn4i7/4C3j5y18eFPeKoiiK0jRU4CuKolh8/etfh+OOOw7uuOMOuOaaawbdHUVRFEURoyE6iqIoiqIoijJEqIOvKIqiKIqiKEOECnxFURRFURRFGSJU4CuKoiiKoijKEKECX1EURVEURVGGCBX4iqIoiqIoijJEqMBXFEVRFEVRlCFCBb6iKIqiKIqiDBEq8BVFURRFURRliFCBryiKoiiKoihDhAp8RVEURVEURRkiVOAriqIoiqIoyhChAl9RFEVRFEVRhggV+IqiKIqiKIoyRKjAVxRFURRFUZQhQgW+oiiKoiiKogwRKvAVRVEURVEUZYhQga8oiqIoiqIoQ0Q3VGDdunWwY8eOOvqiKIqiKIqiKEqARx55BObm5pz5QYG/Y8cOuO/ee6GAERiBYiXdt13ACAAAue3Lc5WltocZ+7wOI2vlc6wb6v9LUZJYXh50D5pJR29+KwqFXt/T8GlfrJNPPvkkb11BgW83wt2OzZOWxXAGHW0gpa9VCrs2ncO1CPX54P8JpWXUIbCrbqPOQUIO4c2pI3RMOgBQ1ihYgyn54RrebIGfUyiEhHjsnQI73+StBYFT1/G1cbCkKI2iLWK6yn6m1B0Szpy6pXXEiHW7DhX7yhplLUVe5IJzvrjnlC3wTaUpYTa+Dtrbvrwc222jaQMUX3/afq7bDPX/1rTvzlBRhQhOrTNm/7r2yYGrXa6I7nTk7jtVXiLafe2p+FeGHHX008DaWHJNFwl8gyQ+Hg8IcsXgr6WRYeg4zbmoQsytlXOsKCRNcMSl+0rKV1W2aRghzRHrHPc9l0OfOmBRFEVx0OoYfLxNhfOYcsMsVO1wJEVRhDTdNeeW5ZTLVSambA5CglsijF2i366DE66TI6QnVOewogMZRWEjNXFFMfh2A6mr6OBtqWPvEu3cNEVpO9QdMQ3JcRArmAbphucoU3V+ankfMWLaJfSXl3likgrhcQl+jrvvK6dU/31RGsWwm61VIz13LIHvimkPTWiVxMavtbj6nOQO0dFzm4+qV3XSH0wHVQnTOl3yFPFdRZ6kTA647nmMcKe27TYpUc8dTFB9xO0o+dHBQivQa1Z9sAS+yz13uYZU6AyuJ9Xt55ZdC2iITrNwiXhqACb5rrrunq1p1z6na16VoK5q35g6qxT+knIUXKEeSrPFuy3csWiXhvO4hLrEsdcJt+2gbSFSLfvurDWNNihEk2xjRQXeLyTG7YEDHkhwJvNK8hSlKnz/I77/CSVALrHZFIGcO6/qdG6+lFjHXirWTd32IIAaEEi2cX9ceT5yLPGprE1aeJdIr3vVIwrRwSE5AH5nnVMnta3LZCptIoeDHjtgHmpShXyO9BxlY/oSs09q/2P7w83n4BMnlFPPfU8Jepe772vfF+aD265ihZy2OcuYlojPVhM7wFSGDvEymT6xrEJaUZRkUuPAuUK3jrRQXi6hPSjRz8mX4HMipWE2LgHuSguF9nDa54irFrqt2Qh9V9ba+aiSmDtbNaMufrWIBT4VBxxyFLkr7OBtSQy+b19FycmacdDrIFbMx6albqemhfLqvhshrXdpiU43FBn+N0Ycv92jo733LpfeldbprHbuDba4TxVEkvX2XeWawKCEYez5aLiQHRgq8tc0UQ+6isG3rKadllK/oigtIefEUolIr0vwx6TnrCtV4FNCPiTec4tVLEwWF93i37SPRT525l3tYJHvC+ex+0a5+lTf7TxXfpOQfJZNOJbQ57uWUZG/ZkkK0ZF+ILni7jXmXqkTdewTqcql5wr31H0k/fOlV5EnSeeK9hzhPtx8Cs5KOeZYTLoR/sblx849TrPzKHHPDf3hhgHhYxikc59b7KUcSx3CU9q/hovhKFTkr0lqc/AVpU2oqM9ALpeeI7ylZbj7+PqaW+DH7pvqwtcdhhSCcsE5ohsgHEJkl6dcepeTj/dLWTcf97lumhQGww1lqpNhvRvQApGv5CVLDH6IKp56K9lXUbiosE+gakFPveemcd9ztmPTc5ZxCVlXGE2sYE8R+lyxHYIbe2+79eY8jIz0u/o4tMa8xjj5Ppru4gNUs4xnahuSdgYhVqvoW5OPt2ZyPwhyrZMcosMV0zmXutQQHSUXKugT4F6YpEIyRciH0nxtcbZD6dIy3HI+sZwSZhMj8jli3hevzzleW8wsLpavIyO9dkZHaSGORXmnI3f1ua92P13b+FhCx16niKsjdMXVRo7jbKL7b6hi8Ob6TknraMo58qAmbR6SQ3T0Q1Caiv5IZCZVsErDa3A+Jy9V4Ev670J6cecKUIk770pPFfghIc8dIPnyXaEx9ntb9Nti3+XKG6G/tFSWd02CdYmf0ERdqoxvRR18THYfBkVI+OV0mqsKg2nSAKpKcoj9hqPX73Q0Bl9pNFSoloF6ArLvB0HdegYxAiPWLU4R8Jx9fe9d/Y4RpFwkISuxK9akfhbUtk/Up9TrghL3tkNuC3Ej9o27b4t37Ox3u/Tx+KBEO+5LqByuz3WcdeATg5L+xNaTIv6rcv1z1T0oWuLKx6AiP41sMfgxcfW58MXrK+3HfJ4cga7fBQY5hEWsa5xDxKeIfF/fQ3kUueLMOevHS/stdedxnn1sLlEvqS/2e4eFOhUyY0+ydbn0i4v9bj4mtDoOt6zLyTf9z0WV8e2h+mOFcmjQU9W+g6y7DiQiv2UDAo3LjyfrMpmDio3XGPz2ExLvVD7XkR8a575uxy/GbeUIPslrTsfeleYjl4Cn4D4UalBuvSv8JsfdEsl5NSE4AOHJsFjA4wGATVH0r6ufQ/hwQnly4RpEpBIKLXKV5+wjCUsaZKiPr+6miuOWCXcpatbJ0RAdpXZ8YTe56m8dg7hdH9N2jKjMKei5Qt/XV0OV4t1HqrB35Ukde7wtceylIl8yCRcjmSRr2jUi36ygg4/bXm3HXlPf5eRSgwq8jzQGH5dLIfb3I4fgdp3bUFmp25+66k9Vbn+TRLWKfMVCBb5SO1WK+4GSW6TXKfq5bYVEpFTY53btbQYl4H00LRQn1rEPfRacwYKvv5RIwZNrAeiYe3tf49LbS22asiYmP7TSju+OgQ9b+HPKxZI6KTWXIOQMaLhuv3QC8iBj+6v4nR5ikZ6Kbx6e0o8KfGVgtFbcp/ygD+L2fRV1VOHYp7j1AM0U9FJSxX1Vjr393vd5cIV96PtJCX5bMNvhNRzhhr8bWKgbJ98VDkT1j+Pe+/qHy8Ui+V/nCmdOOVyGM0jjOvgSpz+mfK59qyLlLsGQu/g2Gp/vRwW+UilDK+I5F9UYkV2nay9tm+sKpwj5kJjPLeKxsytB2peYteFzOPZ4OyTCYwS+qdPUJ7nbwhGYlGixV9DxYU/ApdrmOPmUe+96bx9T1e69TU4hnCoSfXMRQk5/aODgKhdbPte+VcL9PknqG/QxZURDd2hU4CuV0DphL3VOU/NylK+KGOfeJQ45Ir5OYZ8i6Km6UtewB8gr7kNCHwtxXC6HY++rw9UvDCVEscg2r7b77qrfJRopJ9++MxCa1IvfU33OSczKNq79QwMriWPvO7++fvryuYMj6So8KasPDVoUc4X5kAl4Dhq6sxoV+Ep2XMulNoYqXFFXHb70psI9Do4YlIh8Q6qYzyniOW1JnzILkCbqY9163J+QEM/p2EsFvgTKoaccZOzQ+ybVhr6DlLCXiONYuOeN425LxbVLNMasHOQbAOVw+KUDIekgIXb/HKxB8S5FQ3dKVOAra4MU55PzntNGm5GcH6nYN6QI+5CoT3HtfEjFfV3CHvctdslLql+uuwCSeiWEBI3JxzH51D54gEOJdLs+Oy7ft3oOdWzcOPxccNrjDkJcA4HQXQquk89x+EODlKbE8OP9U+qRtKciP8haD91Rga9kAzv1A3fuYx1nbhmp2PelU3B+wLk/8rEXA+7ASPrqE58YKsaaEvS5L3ghZ5IS91xRHyPoOWWkk1wlA1mOa58yGMZgYekKkzFQoTq4LY47j9s35am67fq5seOpSGLPpcJaks7Fd6dE6uBLQoxc5aTzInL+dvqImRysIj/IWg7dUYGvZGHgYt7AFTIhd1niPscIMwrXj3Wu9FCej5TBksul9wn7FFEvXXFCmhcr7OsQ9bgvnM/N9d43GKta3JtyqQJGWgd29U2Mv/k+ugYHMWEqMUjakfbJFdYEwHfuJQ5/29z9qsV06l0ERUGowFeyU3vsfUjUUyLdrKdt0nzpeH+Oo+nqG4YjULHYtYUuLhvadqVJSBGGHOzjxaLe7nuOi6BLBFFpoYc1xYjz3II+tL/0PRWO4xP03LZz4RPwVJ5Jcy2xGXKVDfgzMNtVz//gutZ2WUl8ecz/lGtgEDMA4KSH8jj5VLm6nH0OdQ0q1hhrLTZfBb6SnYEIe58T7/szgn5xsT99aakUNdQ+pm7pw3xsfCLeiARKzHLTJIMCV5oLn2jzxWf7cIn6kKBPnRyXW0xz6owR+SmiHm9LhL3dFtehD6Vz4QzmYt15XzgPx/l2tWt+TwDCy3dK72C5yDW4oEQ5R6T7ylTh7Pvycjj7sb8pVYjx0KArZVC2hlkrQl8FviICi/cCRuoNz3GJoZCwNwLefgUAmJ8vL5BLS6sFP0fkA8THk1NClnrFafZTPKl9qDS8egjVH6pfGE54ilQcuY7H914KJdp82xJBXYdLH+qDdFs6SEu5C0Btx0J91w0+UewSQq4HYOH3g4bz3U+ZpB5zrJwBQYyLHxoA2HmSQQAnn1smtJ9N3WFmCpthn4SrAl8Rgf8ZGuHWh8T8/Hxv27x3veJ9saOP26f6Z+MKmbHFui1sKWFvHsRjC2FXWfs1NBBwieeFhbgLSsjRogYWnEEKVZekH1yhCyB/umuObdwubjulXu6xS5fT9L2n4ApI310ajovPcbGNYDKx9fgJtpz+St3eVKi+cO9qcB4GJhWRMeK+CrHPGQRQeXY+x9UPlfNRlfBXsjDMbv5QCXw8Ghv20dmaxo6Vp9x5I9qNqF9cLP/m5soLui3sbeEfK/BdcNx3SqwbgW7EPED/EzftMqY+l9A34o0S/VQf8Xtfmg+XaMB9ju1DjLvKEbgxwl4qvmMEPTctpS8xE3S5+ERz6LP23Z0CoAWsS9SZfrhEPnbxzX5U3U1x+H2Y47TB3z97gIOPjdp2iXfOvtR2apovPSWPKscpy0EySFQXX4lgqAQ+FvMq7vNBjXJj3XtRWI8tsu1tW4AbcT493S/ozfbsbC99Zqa8uOGypg4s6CkxT10cMS6nnnLlbXHe6fTyRkdX59n51L6++k1/KFHtGoDY5TjbLsGO71S4+kLV74ISjJQIlYj6GLc6VczHCvmUtJwhSBQuMc/5bDl3cUKuvgRb5GNCYrJOuIKWI6bxPvaxu5YDdfXJN2hrk6vvGsRRSMqGqPtukEIyjMtpDpXAV+ojJTRHLO7Ne/Nq3Hjs1BvRPjNThpkcPdoT8/Pz5d/sbPlqi397wLCwULZji3u7fRuf0KWcdVvkGgHe7a525MfG+vNw2cXFXvmlpdVuv33h7HZXlzF9pe4amG3XbX3XRdrnwFKDDKoPvnNr8Amr0Odl0iRufayD30RRz+mXa78YfOKPKutLk4h7jhOLXWfXUpjm/2XQTr1vwBS6u8Bx4M33AJ8Dl7Pva4OT59q2y1NlfGl4X1+6qx5JPtWOqy1uHbn6oyQxLNEfQyXwQyE6w/KhDYLQeeO48iznHjv19p8R9bagn58HOHy4FOaHDpXpR470RPzsbLlt/o4eLdMWFnqi3m6XgnKjAdxhLragNWWMaMeuvC3wjbjH6WYfPBjAdQH0DwaoV+z244GBXca8ty/+xt1zCXJqMEMNIDjCXirmQ2m5HtTkE/ODEPK+9KofxhVCKkh83wfqvW9JVVvIY0HvEru4HXtVnLqxj81119DnbHOFtY3P2XchcfFD9Zj+UduhNGpfbjqVh/NdZUL7SPfNIeJ9deggYc0wVALfR2MexDQEULeyOOc3i7g3DrwR8DMzpcCfny8F/uxs+TozU4r56ely+8iRXr4ZIFBOncEWoa44dyqe3ohu45obUW4cwuXl/lvg5gfdLtftlummLoCeKLPFsqnL7B/C5ZKaVYTMIMTO49w+dol7StRTwt4nNriuvE+sS9dy5+Tb9eK6qbKhbV96DlEPIHPrY4R8isstcfBjwM6yS0zaaYPC9IX6vDh3GXyOfSgPl6PubNhzFVztxgw2fH2kyofS8L6uspy8UL0cuOJdnfpG0WZjeKgEvi8GX/oBtflDrRJKpFNp4uUzbTEPsHoFnNnZUqxPTZXvzfbsbJlm8o4eBbj//lL0m9dHH+0J+76OjwCsW9f/OjbW74xTLjkWsOPjPZFt/nwOPUD/QGB0tBd2Y96bepaWeuEyxt3Hbn+n09vflLW37VdXXL99QbGFPr7QuPbFx0fdMbDr811kfencV198Pdfxp9x/uz5Xn2PEfYzQlgh5Tn0hYRvKl7qjnDpwmqsee4AeEvMUMU5tqE4uvvPm6gu+y4DdflcMfYx4tMu75iq4BgrSPNNHzravjCE0gIv9zub4vlTt1CtJ2PqlrXpwqAR+Cm39AJuKKM7evLocexOOMz1dOvH4/cMPl+8feqgU+Hv3lmL/nnv8bRdFWT9Azwk3F0c7VMaOYQfoCfPx8f76ut2yDCWkqUmm+D0+L/bqN/a5Mk4/JRxDTr45RpxmzkdoOT3KkccDGWrgEBL2FFJR73LsYwS9b6DgK8/d9qVLnXmqb5z6cuW7SBGS0jqwqI1x6VNc/Jg7HqF9OYLWpKXE0EveY0cf34nE76V5rn5S+dwyVLlQuq+u1LJV04Q+DDFt0oqNF/i+OHq8sou9jcNIXILT9UG15QOsE05oTvQKOdi5N/H1xrE/dGh1uM3+/eXrnj2lS797d7kdg3H35+ZKcW9ceVuoG/AAYHS0vAOwYUMvHbv8rommOF4f3y1YObGWWLXPFW7HOPVmH5NvhL+5O2AGLaZOu08GSsBjIe9KxwMBHz6RiweArjyAsLDnOPh2PVRdVPlYh55KixXxrvpiykjK5cD1/ZAKFfxEV+pzkbiyVRIS9XYZn6AN1ZHy8Cu7PmqQ4XLzfftLHftYUc9N86XjfF+ZlLLc77k6/oqQxgt8adiNSTNC07W/K9REhT0NNzSHjS3Y7G3j2B850hP2s7MABw70wnAefrh06++/v3y/Z0+8sDdhJevX94R6t9sL1TGuPJ5Q2+kATEyU5TZvLgcE69e7BwVmP9w2wOrJqKHBgSkreZy9D1e7LhFvnxNqpR+7TurYQ242V9THCntXWm5hnyrqh1XQp4DFHXauAfzL2Db1OCVuPUcgxzru1HuXEKXOvUF6Z4DadtVHHbNrf26aq86YMjFllVbQtrmcjRf4EmJj8HGsVa7224pvoONz7ql0Epdzbwt749wbYb9/f5m3f38p6O+8s/yLYcMGgMnJUsxPTJSidHy8J/CpWHzqR9oI/MnJ1YMBs485Pnv5TXwubGzh7lrSEmD1oIOL2Z8S8ibNiHZ79R47zY799zn39qt9rPZ5CYl6X7kcjn1oyczcAl8q5teiiDeEhBhAv5B1YVbC4QyIQ4OqFKQDcnz82Em38bnkeBUsgNWhO7731EDD5AOsfo6AaxDhq9M1AMB59jZ1jiTCH5fj5EnKxJSVIK1PBxpJuGLym2wMD5XAx/jCeewyPuyQk6Z+iLmJPc6oVXLskBwj7I8eLV9nZkqnfmqqDL85cADggQdK5/7BBwN98TAzU/5loPjrvy6dewMWueY45+fLC+DCwuoBDrXKC4B/CUCXkLaFtmvZTVuwmxCj0VH3ZGG7jC8khxL1FDHiHpelxH2sY1+1sOcuVUnty82LKVfV/gAyMcERW6662zRocZ0TKtSFWhnHJ3g5731CP8bBl94pwPviY+GIfs62q4zrGKj0UJ6kTA649auIr5WmO/qNF/icmPvQ/r5tDCXo14qwd+E7h+xzQ4kv+4FVZsKsEfiPPlpuP/QQwL59AD/6UfkeCftBfzIjf/InZHrxne+Ub4zwpJ6ga86BWabSFqlcsMNntl0CnBLuY2P96UboY8FvC3sqJAf3xxASrRKBH+vapwp7ieC32zBIBX1O5z62fAout1eyj2u/Nol6g8t9t4W3weWG43o47jj1nhLjkjpcZbnhQLjvLiFvk0Pk+9oItW33OaWMCvChpkozmJqPGqLxAt8XZhM6idTgIIQkXGctCH/OoErk3AP0BK5Z7tI49yYE58gRgLvuKh37//qvUuzHhuMMitNO6/8xxw/nsh/GtbhYOvu2yKccfox9QaUeKkUJexxq4xP2tsCXuvYSl97n1GNxnyMcJ2Y9fPye2k55SmyK0JeUGxSS/g3KoY9pK0WwUaLf5bQD+MNqAMICnCPGDZyBgl3W7rtL5Nt95bZFHR+1n69vXNc+JNKbJOKpNnTw0Dhyhu9I9S9ACwR+CnhwECP4fUjqaMNgoJZYMtu5NyLfrIpjXg8dArjvvtK5v+OOVRNom38mAUZOO23lfWE79nYoknm172SYP9vVx0/T9DmdlLDHr1js+4Q9R9xzwWLZtU0JfVvoSF370ORZ6r3EsQ8Je+rz8onJkNCMEaKDHgBIviuD6iunXZcoDJW1y2EBi9MoEQ4gnyRrt82pP6Vem9DgwbevNI/aBggL/1A6roPbP05+iCqEuor/7DQ9PAeghQLfFbIT49Cn0AbBLkU6PyFYBosl42Ib1/7w4VLgTk2VoTcHDpRr1xvnft++1avjdLuDfXx8BEdmRmFubsOxFSo3QXcLwNZdu/of3GWcfXNuFhZ68fpmnoJvTXbqabs+gc8NzXHdEXDBEe3m8+M4+b6QHI57L3XtucI+l6hPce5zC+Ec9YXcz6qpuh1KlHMJCXuzjR1xAPna8758qmzuelNW75HkufqI8bn2MY596iBAAq5H6t6ruK8crjkqMVF9S8Sb7RBsgT+omcIc172ukdQwinoXUXH2Powgs537ubkyNv3w4TI055FHykm0Bw6U7x977FhfLFom7gEANm/uP3979hQAMAYbN26FsU6nPBfj473X0dFS3M/O9tarxyIYg0NlKGEfcvCpybWUsPdd8PB7Vx4l0PF7yURa6ry4Bgeh/oaOQSrucwp7qYCt2w0f9J0CF6nrwbtcaJtQyIfEXfblhSaxSoQ9x20P1ZuLVOdeKvrtsgBxx1PFebBRcd56fNo5djUeruZlC/xBidu6Y/BD9cXSpsEB94FWVPoKtvAyYSf2w6ump0sRf++95ZNn/+d/yteUNe1bwpEj5d/ICMCGDZth48bNsHnHDpjYsFRmHDjQm3Rs7nrYoTuuJ8661rDHq+m4hLx5Mi+eSOsS9i6x7tr2lcH12OI8JOrxq8+1p8q70vA296m2nO3YdGkZFzkegDRoqlzSEuM7X64Ha7mEvcStNnCeMm0TEp4utz3UJ8mAIuapt5JjCJV39RWg3pAdHxx3PpSvg4BGUGfIDldPti5Ep820bblNzgCKBIslO+bejrs3ITr795fO/X339Yn7dpwlOc94Rnlkt99erNzEmJ8HWJocha1btvSeqmsuHHNzPcFOufi2ALcdfGqZTPwUXiz0zb4hx54DddeB2sbpKeLe1Q+qPSrftV9ucV+Xg99kIV+nQE/F91AnvOINh7od8LaQ4uJL6pOmc9vz0fbPRmkd0SE6dYXspLrwVY6qpLdW2iDsqUEIx9EnMbHjs7O9FWOOHClj680Dq3bvBrj7boDbb+/f9+yzAb73vfQDajCLi+WDeA07dwJs3z4Cj3vcDpjYtq3MNIMhM0haXu49NMvgW0HHtX49FYbjegotxufG2/H1g3Luzf52HlU+lIbrtOvF5TjbkjRfuqtvUgYlsFPuPAwC+3/BPmdY7OMQl7oFd1sFZIqo57r2OR37XITce429by2c1QZdZSidKF3u3RDt4A9SrOYMu0nB9WSzYUJ8jrFos8NLpqfLv4MHyzCU/fvLJTAfeuhYWxZDLu4BAP7X/+r/vtx8cwHdbvkw3aIYhc1btvQmFdsr8VC3t23nHmC1sA+Je8qxxxdO+9W8D7nxrrJUuuuBX7h8iFDMvaQuql5q3zrFfYywr0PMVy3cq6o/1gVuMjGhHsNCDpEvqTsHMeJeUQIMfQy+KVfHYCA1Rn+QITyhtlmzt7HAs9d+n5npifq77y7DcX72s9LFv+++rMfSVh5+uDxF09MAk5MAJ520CSYmN/Um4k5P9zv4eB18HHsPQAt81x+AO2YVp1FhWHaez7WnBgvcB1jhflHuPdVfXAfnvSssRyruU8U+7ouLHCK+SpHeVOc+l2Preto0hZ3Hee96qnXI1XW9t6HCjqR1DBKu6M4l8jlIBHusmG/iZ6FUAtawXE3b6hh87pNtB+30Yzgiuk64t4VcaX3g0IzFxVLcG4F/4ECpZB95pHydnx/C+x5x/OEfjsD11xdw8GB56jZuLE/fjskt5QRYcz7tWHyXUMdiPrTkJfcC6QtvofJzu+ghcjnVseEvnONpkrivSnQ3Vcy7kIY6YFHsE/dccZci7mPFnqvutpNDtOcS/qluvDQ0J6YNpTZSlsq00zmIYvAB3HHnvuV+QmVxvWa77rXtUxi28BzxMplY1BsHf3YW4OjRcjLt3r2lqN+9u1z3fs8egPXrqzuIFrK0VEYtHVshFLZsAeh0xmBiYgzWTVhLjAL0O494YqzErXe59pQL70rjOvVUPte9d71SAtgnMDnuvQ3XvecId6nwDYl7qbDPLbzrEvKcdmJFjWu/VGGPt2OEvlTcS9/XUXduUhz7VBefI7Ylg8WY0BwV90NPbMw9RuTgc8NlQqE0sfUA5F/6MhehvsSsdVoXvv5IH6wAAL14cROaY9aFnJoqnfv77y9X0Nm4EUamptI6P2S87nXl+b722gL27y9P4ZYt5fVnx8SxwRB2xgHCgp4j7CmotlyC3s7H7zlCWqkec/enqfX52qmjLm4YS53CHrcX2p/TnmvJzViRS8G9O9BUQRpzLlJCcXKIe6WRSLVqrrDyxsfgY3wDgLpj7iXgCbk+XHc36kIcmoMFn1kO04TmHDlSLn/56KOlwL/nHhgZ8rXuU3n963tCf9u28rRu376h/CZQolso7oNPLXaF24SEfci1rxLKza5zMFGX2G0a5rvVxGNPEayuZ0349o0R9ng7xbXntkfVH7M/d4BSNVXE0aeS08nP0aay5hjKZTKbJu4B4hz8Jk62XQUWckbcT0+XoTnT073Y+6mpMrxkcnLoH2aVi+XlcnzU7ZavGzdugFF7Qqt59YXmADiFPYD1/+ILheEKe9z5OhkZWS3yKdHtEuKudLteXCZ1O8ToqD9MxxZS3HAdlwhI/byaKi5C/XKJUUr8priwXFGP2+YKe46DHxPPnxqaI5mfMCikA8EqnXtO+zH5SiOhNGEuDSsK0QkF/Evi6iWivYmCPUTTJtKmwJpYa16NyJ+b68XfGwf/6NEyfGfLluo7PURMT5e6fXq6vD5PmAm3lNh2OPfi7x0l7qk83751ERLBPlyCO6XOmDZ9LrgRSKH+xIh9qg8umujQGyTiJkXQc9Ikgi5G2PvyfPvUJe59oTk5RGru0BVOfZL6U8tqaM7QExtjLzXWs8Tg54q559DUGHxMSszVIOP1RSNJ+4K/aE0AnZ0tBf7+/aVzv3dvz7Hfvh3g2c8G+O//rqD3w8dJJ/VO6759ABMTAPC4dbBhA8Do4tzqW9NI3OPPExfvc+99E2ipdJNmV14HRiD73PCQc895b4t87OID9O8j2Xal2elUniucghL+KSESrsHBoEVG7DGFVofhhJ+40qRlYlffqfPOQGxeyjwFatuVxiW2vlxufMrdHk66tIzSeqR6sHXLZFIH2MSYeymuGH3f6kQ5206q1wg++6FW8/O9ybWHDpWTajsdGPnSl/J1fA3wwheWn8vXvlbA4cNlmrkBMrHOcQGznHuDV3s3MdyGwifoDaEwHU4dkrolSJ17n9i3oQRsyt2HuuOnU5As7egTQVWLfImrnUtQctvMIfQxUnFPUYe4jxXdoTZz1avifs2RU8tGhei4FtyXLn2ZS5i3QdznEOlVufmcdVZXfVZUWI6xmqenS2G/f3+5LOa+fWWZk0+upP9rhYceKqOcRkfLaQwbHj8Go92i95ApgL4f+wJGvAbxqs/TF3uPy9jkFv/Yoec49ZTbbotnSuTjfOo9Do8xosmuH+9HbbvSfOncfIqUgUxV1CFEUgVRitBPnZQr3Q61H7s2v7RsqnMfWyYlLVSmTqdenXvFg8vw5cAS+CkC2rdvG4R5CsNwZ8HgfXotdvCN0J+ZKd8vL8PIjTfW2+Eh46KLyn/qr361gLm58mG2i4sAnfERGCF+5LlPeXbG12PqEPYch9wn+l0hNXjfmPeU0Lfrx/2hnHmXWx9y6l2DghBtuvjn7GsVIt9VPoeo55TJJexDdUm36xL3FFW59HWKexdt+t9VakWiKVkC3whVIxqobVyeek9tS2lLDD6AP+wmBe5dk5RVj1jn2Bb2toM/NVW690ePltvbt0sOTwlgVtWZmCj/NqznXQxWXTNcoTm5J9G6HHnbfQcoxbPtvuOypm1fntTJt9N97+36DTgMRnLxlswdcJVzkWvgVZfIqFvch8r5QpRyTcR1pUkFNdWnKgUt1Yccqw250lL2bbqwV+deiUBiHGdZRadKKEHfdGGfm0bfCcCx94uLvYm2JnRkcnKgXRwmut3+0zw+DrB+vez74QzNMds2MZNofS57VWXs977JsVjAu+rA76ltX/iOvY+BcvM5eZjQ51DFxb9KQTEIgS8V8aH6c4p8Kq0KUV3H4MKVxilT58CpSjGfW9wrChNxiE5OJ5pLY8UtIsZZl9SdczsJX1iOmVh75EgpfLpdgCc9KV/ba5zJyfIGiZnm0O0CrF8P0O2OyK6tlLB3iXmuAI8V61iY+0Q5dRyUQ4/vCgDQbj5+nwtqkuvoKG+FmkEIeA652q1iEm+uybahMr59U8U/R0BT++YSvVRaLlFftYCP6UeVol+S5kuPLacMLdlDdGyqmig7DPgGP9REiZjJyo1aR98W+mai7dGj5eviIsD69TDy0Y8OupdDxW/+Zvn533JL0RcVtX596eaHyPK/6hPyuIxP0PvcdyzKJXXYwh27+Xadpgzl6OM817Zd3k7DIgj3wQWevNsEUgW5RHxziTlHOUIfcgl8gHhBn5KWc7AR215Kv+pw8GMc+Zg+SepTlAiilskMxeDnpE0x9xJCAwBuHoYaOJj9XAMHqm94vsUKdnw2dvGPHCmXw5yeLpXn1q3evirxjIz0FiwaHS2vEfafF59z7yoHIBP29ntKeHNEvjlQSvRSrjvHlXc5+jH4Bh5UWbufOcB3CaoQ0gDp5ygndfQldSAQE/6T6v6nOsgxIUBNSsst8mO2JWm+9NSyinIMscAPOdE+Yt3+YRL2OQg9ERgPwKj9Q/UbWKvnGPfehOqY2Ht9Ym1ljI725jXPzZWnPEukCRVz7hP2PvEeqsO3LzXxNrSPy5XHjr7BFsg5l70MwRkocevJRWxdTRL/sXVy9wndycj5QK1c6SmCXlJnU9LqEPqxZbh5KWUVxSL5QVfRk/uGAEpoA8hCaQYVk8/tG4nt4uOHW5llMd/3PnF7Ch8TqnPbbcWK2B8b610LyLvu1J2YWCgRH8qXDAZMPp7I6gqvoQjF7qeuiBNy633nmKo/l7Pvqr/KOlLbq0rESMOL6orlj82TpruOn+vQu9KrcK85oplbrg7hL+kjNy+lrKIgWvck2ybBDauR1JNKrmUyg33CITrHhD0AlAHhSi0sL5dr4i8slBqV0tFZoJz4kDjn5OODCTnvIaGf68Bd4S/U2vqmXQx1fD6qvJhX6WwDpMXpVxVaZKj62LnlQ2ViRaFExPvqyi3QpWVzCvrY/aoYuHDyUsoqioOsk2xzTA51PfVW4ZESQiUCx+KbGBEA3mxPJQtG1JtXG1Lr4vh7DOW2c9x5/F5S1if+cR9ck3CdB0ycDE74DYXdLkYq8HK69ZI+uJAI9Nwudyy56q56AJBD/OeM6/fltUn8Vyn0JWm+9FBejvKK4iApBh9vp0wODbXTBkKDn1yrD0kHVbEhQGT/cOy9HabT6ZTuvTr4tbFpU/lqpkGMjZXbnU7/dSLp/8nl3puGqXQKl5CWuu52e7bTHlqGkuo7tR1Ks/stxfQ3h2udUkdVbmKKOKlL2NTdR+7gifN5xgrJqsV+rvQmDQRi07n5qeUVJYCG6CTAEdOhp/r6VrcJiXYs2nKvZhQchGChD1AuzL5hQ9Z+KG663f6bKUVRQXiODeXSm3TTCVyOcug56ZwyOI7e5ey7+mio9KRZuM6fr3zOtnOXbbKIydVWXeFHOZz+tTgAqPPugi+dm59rH0UJoAI/gVwP/QrF78feJcEhOtmXMzW2sXHwAUrFuXlz3nYUJ91uuVzm/Hy5baKjuin/2ZyQGeq92ddXLqU/vvfUNnbL7ZV4DK47D6E7EnUNCGKoO3zEJsdDrKqOybfJ/RlWLe6GZRAQs8+g0kN5nPxc+yiKABX4Q44rJj9K7LucR+Pgdzow8n/+T2RPlRjOPHMEfvjDAhYWevrW+jj8+ASyz6l3iW2AuLzQ4MCX53LhsbsPsHrirGsd/NDJa6K4r8JJryouX9KH3FTVZuoANnfZVEHaZKEfs08VgxpOfu79FEWACnwBdT9R1rcSjnRisyQmn303otPp/0uyjZVY5ufL02/Ww+92hdePkEPuE9E5wlx8YSuSPF+/cL6LpaX49cyrItYV54ru3OE4uc7PMA0AUuuvYgDHKTeIuwFV1ZtLrKuoV1qCKjIBlBteRRspy1v66qoMI+6rWBlECRJjQpOVhBx7vB0KkQntazqKt2MHFJxYex+U64/xraSTm9hQlUEI9roc7JzU3W7d56jOz3dQA4XUfbllJOVy7acoiajAT6CqlX5iY/A5+2abnGu79mNjAOvW1Rs7q6ywfn2pOUdHex9LENudp5a2BEgXzLnxOfb2tknD/fT1nzsiyjWQzXEOc4d25Bbyub8nTRNKdfSnzkHBIAZ8dX4v6/zuNu27qqxJVOCvYcQhRy4xaAS+hugMBBOSQ+lZsYtvduRsx+wTcuClIT8uIS858JhlOgdBFX1Uh3Pw7WMGGeI0qO9YFeWqLFtlHYqSCVVkAZImpTYIvAY/le9bgpPEOPjj4wATE/rjNiDGx/tvqHhxDdJCoTUAbpFOpeW6WHL6avfDlYb7aOc39XtbhQM7SEHfdMe7KVR1DHW70nV8P+rap8p6FKUiVOAHMEIXT3Bt8iRb3wO1fEKfvZymUZNGUS4v91SmUjvGwR8ZEYToAPSLZYlwttNzpnGceMngwSX0uflVU6U4UQc/jSb2KZacxzKIz3hQ+1ZZl6LUAEvgp7rYLvHZNnKtex/bZo7tmDb7MEIQW8b64zcwRkf74+9Fc0CxyAcIi3pXenKMkKdvvjRfut2vUP6gqMrpbLqjn7rfsNLE89FUoVzFuWri+VeUCEQOfqqobaO4b0uIjsvRz73PCrbAF9nGSm7GxvpvqgQn23JDcgwxD4OSTnKV4BP5uC1XPtWvVOpyGpsq8KVlc+ynVEvdn0td7en3TWkpEqOcJfDbKMxzwF03vgngPvr67Po8RbH3RsDp8pgDx76J4nLvV322nJAcnIfzXQLelecaCFDlfftw8Dn6VNm6aHJcfRVOfe5zO0zCrO2/m039LJraL0UR4tJxXK3GDtGRsFYHBIMk5ZyzY+8BesLJCHvzpz+qA8MW+FFLZeI0jE+422Vi8jihNbgPVD8kjv0gaKpD3xQH/xhtMFNyMAKOpygrSoto2v/rsGlPX/QI59xXMsmW03CbPogqQ3RyTJzNCStcx3bwcbpSO0mnnTPRNEZMcwcFnHxfGVOuid+9QYj6AYTlxPw2tt28Tqf/nNX99W3T9Vepl6aJdgl4UZK2k/pZZBH4w3AibSThLjnb8rWb0ifu5yOOwbdfmyqy1gD20pi+ZTK9A0POZ5d7WcmYya9NdupzC/oMZTi/E+TpJ9IkojxVwLdlANDECCReHfJrWlP+zRSFg2s58KZRpb7UZTLXACmOP/vL19Gn2A4KPA3CdyFOuvvDFdy+TkgFOieEqAkMQNyH/jdDIpkrousS9ktL8fsOCrvPg36QN+dmHGd/FymfbVP/bZW1RdSCIhVRx52SLAI/taODPtltviXlQnJOKdHHFQ8mBF8ZHLZrzxH6Ob7vK9+XlEmjnC9OU5UBs1/cc73qVCwH8pl53DJccV1E/FS33c3nfgV951D6NZYsdWsGFqlzyiVRek2hqf1Smk0TlmyXaq4YBubgD/rkKvFgcb+8XF7cBu1grVXsuRp1XfCyzLNJ7axU+WU6OdGivYIyqcKdK9ilpzpFlA9a0GO4d8dCdVQ1V5k7OJMMBDjTbnzlMVUNHto4KFGaQRNEvqEqkzm7wG/KCZMgWkWmJbieUkvhe7otxvygLi6WrwsLpUhYXGznLfah4NiHMXJs+ZzmTHqseCJhZFhYzLHmDFNJEeUhQZ7adtXhODm+Z1V8V3NPK8lVNudCSeY3mwLfMaC+gz4DJ3YQJFnNltrXh6ReHQysTVzXyip07CD0ZXaB39ZZzMMk7qvGdvCNuF9c1B/JgYFO/ohA5NfpljbFma3Kja7STU9x8QcRix9T3qZus8DXnuTOpEToclz9OtYu6HT83z0j/rnnKHVBLldZTM759m2Z6qNUT1W6dRAPTK00RCfnwXCXk8Rlqe21RFWx+MvLpa6cny/fz81piM7AMAK/01kJyA+J/NSY7qZQRygIR2ymOOt153HyAWQiOyY2n9uPQUOdB4kA5MTT+347fW1xHiMheXZdqD1Xvrk7IJk7EKo/JiwoV8iOdO0AZTio2pSuI+7eplUx+Hgf33aorM0wi39JnJnky7e01Iu/n5srQ3U0RGdA2AKf8ZQr6uJVpzubk5h+5JpQmiKiY/IGHYtfZ0hOE75fksWdOPv4yvgEsvnsqEGAb+XaUF4Vz6VzfY9CdwE4dwBSn22Xw6VXp3+4qToCZagc/LaE6Ay7wx/60koGAUbUF0W/uJ+d1R+7gYEd/GNXaOp7jS9QMWIyRJXiLGYQmUO81umyu9JjY/PbFqLTFHEfM4eb4yDHDA46HTqG3iWcXYLZ5dy7xDM33eT5BgY+OOE/nPMmObc5QnxU8A8vPk3oevBo03TkwEJ0Bi3+XeE8w4zrnLtEfwg7Bn962v+QJaVC5ud7An98fOWqIxX3OSdgYlLv7khCQFJXpvHl5xbzrvMSI9irDtmpe1LtIO4Ipiw5WWWoDCe23063BwP2XQHX3QCfoJeIeWm6aZP6vnNdf45Q5wrxVMEumYOhtIOQVvVFjtgMxSRbLk0R1U3pR1XEDKR8cfeYxcXSxZ+ZARgbEzel5MB28B0qI0bg5Y7TtqlyPfUmuO0AcvEubaMKoc8tY6jqjkqImIEDR3D5VpoBiFttJla4S110n7uO01whQdRyxz5xzsX10+RL94l+01cbX5iPT/Bzw3pixb4K/XaS24SuO/4eoGaBT02UbQLDLPKrjCkz4ToLC6XIn5vLWr3CZX6+dO4BnMrHFz7AcfJzrafuayO23KBDZCSCPYdYz3mnwFDVevix++Tc30WM8OLsExOHPjrKF8CuOwJUui+N2heAFv/UE3slA41cdwhc539kxD/Y8rn8Me5+yoRdFfztgNKFUg01aG1Zm8A3J6ZJwt6wlsJ0cmF+tIzIX1xsRuzsmsR8CNzlJzxFcHrKmuzMrlQaUuPLkwxkmiLipYOvlNh8aTlp2Zz75miPI7xSXVnXfk1coIDqq0tIU/uGBhpUOVfZkBi3v+ehgYkhNECh8kL7cshxR0QZDG3TibUJfOmJ8S11qctkypAOqrgPvjK60vzYhW5vKxVhJtd2u6xVdGzsi5n9Hl/EpWElVefncrGp4+IK7lTBnjrAqCJEpwrRn3PfOohdCSclz+VW499USmBTLrXUzZe47jjN5/hz3P7YNBfcgYnL3efOi+Dku0gJ/VHqJacpPVSr6KSAT4S97cvjbK8lcn45Q6tELC8D/L//V8CuXQD/+3+v3XNeJ8U996xeJjMRjrjPHRYTm5cq5l11cMKWUkKeANIGHKl9kuZLy2Gqcqjx+cqxDntVSOLQXWnU8VLCNVSfS+xz+0HVRbnoLqEv7VdqmulbyoRjkxeK2Y+J1VeRvzbgmqe5qD0Gv6kM6yCgyhh8YxyPj/dW0FlcBDhyJGszig9jn5kVdIyTD7I7V+aiZosFLCaqCkeJSc/pxucqU8cdAsln4MvL8fAuST/qrMMFV0QtLYXL+gYS1Hr1KaEo0jj6WEddkiZ1+rmTeqtw9Kn+4D7YhJz90OcQ486nhn4p1RG6hjY5QmQgDr5k3XWl2WDjeHGxXBNfqQnqIVeOq4TLQeSIqpyOdkx6SuhKjICvUtDXJfBT5k9IhHYuUd700B2Da4UXgHhHPSXNJTx9jjrVL0kIT2yYj9Tlz90nqo/Y3fc5+5wJxaF9fKib3y5cc0ubIvhrFfi+MBulGlJGl9R+5sfHDvs2f8vLpbifnk7psSJidra3gs6xGPzYz9tc2OyLsFQIpwr/KkVzTN9D4UpVnZ+Yvvj29aVz86Xlqto/hZQ4bpPuCrNxpYXi6DnC1ieaOS471c/FxdUuf0jUcgQoV6TiMrg/rj5VATUg4n5GdnoOR1/d/GYRY0Zz9hm6EB0JOsk2D1Udt/khHhnpiX3zgzQ/X0mTiguGe58D+0JVpXAFGJyor1rQDyLUx5fOzeeWqWLfugnNNYot73KxOUIyxp3m7MNxr7ntcAYhof04sfI5nPvQwKUowvH6VTv66uYPL3Xp0dqXyUzZR7IdKotZC0+2zRkaNTrac1dMHL75MdIY/JqxV9DJiEvQS4R+rBOdWzjnFPJVDyI4dUjSfOmhPEOdTyLOje/hSDbSNdNDMdkcIYy3XfHzPmEZ47JjcHlOmFEMnMEQFQLFWZ6TGgj5tjl9tdsH4IUT2ek+R1/d/HYgicHHZvOgGegymYN88JVL0Dflg6mCmOUyfefDdu5xuM5VVxVwwgkAl146vOdzkBQ/+lHvAVdoci3G/hzNRU5yscsh9AHkoppTr6QPvvZztmNvV3HMkjRfehUTbUNtDgqOQHKdj5wx9JxtgLCzLnXVpds5nP2cZVKc/RyuPvd8UI6+uvnDBaWjQoayba7WqTEHGqLTBGE9zIIeIP2OBPWlNJEgdnjO+DjA+vXlnxH9U1OpvVecGHFvTvqxWyj4s7YnAbm+B1yxHyP06wx5ySXouWK+qgFEShmqH1R/OPWklKurHgMnLpxbnooNB4gP2Uhx20POOi4rHUyEtiXhRDEhM9IyMROHXdtUu6Gy0oGXK83OU5HfbHKYz77okio1aGNi8Otw8KlR1DCH5ACkf3nw/rYDPDra796vXw8wMVG+zs5qqE7lGPd+40aAbpd9KxFfuKRiiyOGJcK+Kpc+tk1uOa6gTx3EUGmp6/z70qVlcu4XCyUGQ+WlziqVVmfYSEhwx7aby9V3iduQ2I1x+an+cPpk90fi6Fcp9H0DAIyK/MGRK8R56Bz8piyJ6RpFGeHTtPip3JhjzPVFNU6++bMFfrdbCvwrrijgaU/TUJ1cFLffDnDoUPlLb0ZVx9z7Kn/8Q6KzauFbtdB25eUYQKQMWHCbVLuu/XzCMSQqUwZ9gyb1/0ASfhNyuGMddI677BOZEvGeQ+iH5gjY+/vSYvZzCX1Xn3KeG2oybmzYDvd7qyK/XnCkiXTBF2q7LmoR+DkEs3RVnZgVd4ZV2Btca7ZKsX/s7Em2Y2OlmTwxAXD0aJk+Owtw112pPVdWOHKk/3bJ+DgU3bE+QWo+H1esoHHxTXmA3j4hRzIkJF0COIfgTRX1Ma49R9DnOB7clq+9mDRfeigvptygSRFBsftKwmmofV3nNjQQ4Ah9aj+AdKGf4uZTaZLBAE4LrXwjPT7uoCDHnQ3fcVGoyK8PWzO6NJR0AZi6DOXGhOiEwCfBF7/PzaO2h40qQ5DsMJ2REYB16wAWFkqxv2FD7wdo//5Kml+bzM4CbNnSC88ZHydFZNU//iGhV4fgrqsd6UClSlHfBJFvSF1ZpwqoSaAYKsbbTsd5HEc/Nl471kWOFZpcMcsRo+YYpSv+2O266o9N47r5dh+kwr7KcyFBRX51ULopp/vuM99yUrvAb0q4DmZYQ3Rct5ckD2JwffFMaE63W4r7paXSWJ6cLPMfeKDUow88kOVQFMPGjb1bJePjsGw9SIf8wUdXEuzi28XM8qdYaLocRpMmEcRNFdzSNqTvUybhxpYJpUtE+iCXuuTAWUpRilTcu+rg9FHisKc6+iFhLxG90pAdipwi3/RJOik6ZsCV41y40ux6XajIrwZK71BRIFw9i8tS+1ahO9dUDL7S+xKFvpzU6NUl8kdGSpFvh+vYi7ssLAD8zu8UcPrpAJ/4xPAMnuqk+Nd/LR8R3On0ufdLyyNe902CfcEyKyRJXV0bn/gOlRmU8G6asM/t4IcEcIqIT/muuJB8l6m10ykosQfgdpeptNhtn8sb67DjY4hpi+PeS35bpGv3S9IoOCI/pl/cc03dSUgV+VxU5DcDbBK79NXQTbJtiyPeln7GIhlo2V9Ul7A3F6GxsfIHbd268nViokzfvr0sNzVVho7ff3/qEaxhlpcBdu4sT+727QATE7CwWIp77OYZVn5kzIdlqQbKxbcvFEYE+RwsitAE0JCjHxLeHDee4+Q3YdDg25+z7UrzifiQgK9CpKfA7Q8lcKhj9T30yvfwIpeja9K427bDbvrDnRzLdZFDbn5oQBHr8seEqMQIU9d+OUU+AP/Ohtn2nY+UOQq4HIWK/LxwQnRiH7RK5VW1wmNrYvCrYtiXybRxzQBPwfywmbXvjZvf7ZZRJLOzpSZdWipF/h/8QQFnnAHwgQ+sjXOeSvGd7/Sc+4mJXnjOsdAcLF7N58H5scffffsiRYXqSBz9kBgN5fsmt1L7x7rYTbgjIMnzpUmWzvTVE6LKQUCKi8nZnxJ8oXqljr6kPhuX+ObWx92nynak59fXTp2E2o39jOsa9CgKRbTA565q04bwHKqPayEm35fnekCS66moJnIEoAzJWVoqDeeNG8tnMu3fD/DoowAPPQRw550VHNiwsndv6dhv2LDi4M91N8HyfE/Q2S6d+Sx6QuTY/6TjqjECxUoZG7O/qdNOo1xMFz5xGuvc57wjwKkjd92++rnbUjEf+1nVDdUHieDhCCSXCMXiS1In1+XFZSUOuzR8x9eOr+/SUCBcThKeMihB6/qsbXx9zfUZh9qx0+z6Of1V4omJwfeF6ITKUu3m0J3RXwdu4014Wq3CwzUYw19MjPlRsd37devKGHx7Puj69WU5fcKtgOXl3rJExyY1LCyUT9csivJiYX78i6J8j/9i6HToC6B9ATGDiZxInPscdwQ4dVRVt6ucbxu3R7WL6w61nfI9qZKY7xfrzlVLL0M577ZwJyNzv8PDQO7j89WHzz/nrl1qm0o6PsPaF4YTU38OczxbiE7sspVNA6+LT61fytmWrL8vKUttc4+Hc0fClOfefcGx+KOjpQ4dGQHYvLkU+zt3lq/HH19Gm+iymQI6nXJZzMlJKLZshdlZgLm5Msv+MTchUouL5bk3y5f2qll9d4ZqimJpieckxq5gYuqJce45gh/f6Yipg3rvmjwb49rndOy5g4mmUoWoBwgLe5ejW4Ur6nNbBxGqw+0bp31uO7mp43OSOPd4O5eTry59tbh0UigGn1Mntc2th9KBIdgCP5fIxNuSJ4HV9VQw7lKSudbfl67VLxkkUctkUmVC6aHP2/6B63bLVzPp1sTib90KcPgwwK5dAFu2FDA7C7B/f7MHfIOi+L//tzxxGzaUAn9iAubny3CnxcV+oWaEvXE8bcE9Nta7KJi/0I8EFWtPhepg7Lh9O3yrblHpctddZUL5IRczt9CuStw3lUEJegC/8HS1Q6VXdQzKaiTnzvUdqHLAIRHhMYOfUP06CEhDqpOqbDMVkYPvEnkc4Z3y5C+8XUdcfxvmDsTgi/eixDxH5OMVBMbGeiK/2y316fJyGUq+vAxw+ukADz4I8D//U9VRDgGjo2YktBJ7P30EVkS+Caswot04+OaH3WwbsNAHWC32TRp1gVha6hf5NtRgIGW5RV9ojkR4h0JnYp17XLevD1R6Fc69ZMAySCTCI6eQBwgLKV97KWJfKrbsfqpQ4yE5T3XcqfHd6fRNRA7dMYhpX5HjMjO5cfaxMfi5EQl8zkFT5TkuPOdBAIMC33HIETrTBHxfMF+oEoX9Y2Kc3G63FJYbN5Y/OI97XJlmYvAPHQI4dKhQF9+iuOGG3mo5O3cCbNkCxeRxcPQowNGj5Xm1HXx7Um2n0wvPMdtG6JuLigndcQl5jJ2PxaZrIpidP0j33uBbppKzP04PzRPg1hND0x82ZRiUkOe4oZz2qhL7OA0fU8wdBZzHPZ91hM2E2gkNhlI+qzrnWkgFtXSd/NT2FBkhMxu/l+RRZaW6kauNK1kmMzYcJSUUJRbuspGuDy8lLitETEx+yqAjNOeAU58tKhcWyrSNG8sfs8c9rhT8U1PlD9Thw2XoDqy0ubaFfnHDDeWJm5ws/7ZvB9iyBQ4cKF37ubnyvC0ee3KtuTDY4TedDh3D6xL6Bq7Qx/H4Js++4FADgSonsKXWHSvGc4fJcNz7WELhVTnqzlW2TiHPKVeV2A8dZ0y7seVc++RuJ1WUpg4Gpe59Sn9TQnU0Hn9wcLVeVW3a29xwcRdZQ3TwdpNdeZvUPoXuTJjt2AFLznh+V79xeV96KFTH/OiMjZXv160rX7dtK4Xm9HS5MIwdN37ffc5urh22bStDcnbtApichLmNx8H0kfJOBw7NAaBDbrrH/qONoDeC3A7h6XZ76djV54BDdVwhPaZP1ARAn9BMDfGx27eRDApc+bEx/TGYO2E2rnPDHUTVIQhyiPiUcBpuH7jlYtviOsuc1ap89brK+gYUrjCgmLsE3HZ89cVsu9Ko71auuRYp4N/JqtfI1wFAHK5Qmzpj8M22JIqCIsrBd8VkS+PqhwnqA8L5TSTUL+nAxBZ05sdq3brydePG8nXbtlLcHzpUiv3paSPwe+fv/PMBvvOdZp6zXBS33Qawb19PmW3aVJ6cbdug2LIVDu0vzw0OzaFCYcyrvcKLHapjO/lmboQt6s2EXLs+u37KoXeF6LicfNxHu97lZVrQNoG6woxS73RU6dJz2vXhE/RVxMZLy+Sqy5fPEZ6xYl7SNjfGP0Yc1hkWwxX30jpiiQmtqfrBYCry4+BoHonobkUMvgGPLux0ANmyj1Lnm7tvnYOJqm7jVBHrz50oHZp0y+mDHRbS7fZed+1aWdId1q8v/2ZnAe65p3zaLQDAd78bdXjtotMp4+zNAwOOheYcnF0Hh+4DOHiwDHOan+89PMwW+NTFfnS035E3n8H4eP+2eV6BnWYv44ZDeGzwU25dQt+Uwy6U2QcPPrgOdN2r83Daqqo/LhcfwH2Xw/W5cftXhbCLjS0P5TdJ/MecA64rHevo++4WcOvk5nHbSmnPlZYrJCd3nlRgh1bV0VCdeqBCY1IMa+miMQMJ0UlBMkE3R8w+FTvuaq+pUPHwri9D6ijQ9Tm4Jk9LQnXssJHx8d6k2/n50sU/eBDghBPKtLvu6oWjABTHxP9wOfnFj37Ui08yTwA7ttb9kZlROHSoHOjMzJTFTGiO7eBTF0Fz3u3wG+PWmzL2NkDvwmjfBeh0eE66CdVxrfRg0u2wkpDLbASt2Ycj6Km8Qd4NkPQ1tA3gPpaQ0KfazkVVS1AOIk9SxlDF8XPSJMJYIri5dfryXG356g/Vw9lf2ofcdy2qRMV7/djiuu3UFqIjnVVcFYP+0FJj8ql9JPVIQ3JsMY/3D022xRM8N24sBT5Az0E2y2hu3Qrw0EMAJ50EcOBAKfSnpswk3CHDuPXr18PSxs0wPV0K+ul7SlF/9Ggp6mdnewMhe3IthRHmRtgbB96ce/MQMvu8Y3ffuP+2s2/q9gkazgXIDDpcFywqVMcl8n2inyuuXe9D9dhi2zdXwFc/Z9ucE7tdg8vR9xEaBMSECaROdHXl5dzHV1euybyctjjlcol+Xww8V7DHDiSqDjniOPe5J9KmDA45++aIxfcNBHSQIKMJ5nCutmpz8JUegx5khHCJfBvp4MQISIBSUBrBOjFRCtidO8syMzOloW1WjpmeLlszbN/evodjFVNTpYI3Cn18HGByEhZgDA7sL4X89HT5av7ssBxX7L0NduDti4bJM8LQ1GXEtNkHoCeoAXo3Gew6JEKQEqohke0T6KG2XPvhOwih8+gT/gBukZ/q3AOE00z7ALI7FDlio3OsXFOHeHelp8wDCNUdWzZF7Esce1w+VWhTbaaIew6c8rnXuE/5DJuEivx0BhX2nUJWgZ8rjj423pzbziAFds47Fa7VeyTny94Xr5Rjp/nqcJXBrsO6datd/W63F4e/c2cp4KemSpN73z6A//gPI/JL9u9nHVqzmJiAYstWWF4uj2V+HuDAPaWQn5rqheGYp9QaQb+wUIpH27mnBCQW95SDbxz7+fnVDv7CQr+Tb4Q9dvTtgUCMY4Un53KcKGpCr28/fCfA1EG57VJH37QDwBP5eB/Xtq8MLof7MAhSRU/OPCpNEo8d25eqykr+rySCXrrty6uyXc42pw++fX3pobzcZWLRWPzqGLQBm7P92hx8yWQB7kRQSoTidkJhRG3FN6chRx25z5MtfEZGestkmpjxTZvKvJmZsuyuXaWw3L+//Nu3r7+HPZrp5u/fX6wI4oNHeuLdCPxDh0qBf/hwz6VfXOyJejskxxeaA7Ba4APQDr45/y4H3wwOcD123XZeqE+USOWswBMTcy8pIxX2nDsKPic/tO0qA+Af0Lnyc8IVDDkETw6nPmalmlB6KK+qsjmPL8Wx9/WxCeLeRZvFfczDr5Q8DNoEzkmywM8VZ5+rLGe7qeSKyc9BaNlPm5Dbb4s2gF48eKfTW0Bm3bpS/Nrr5B89WsbkP/QQwA9+APDww+VfG9izp3xdXCx/rOfmegJ/cbEXX2879ktL/WEXHBFrv9rOvXHh8Tr4i4vl+9HR3nMKTBm8D155x16hx47Pt/vjcr5tXCLfRuLk41ech0Na7Lh+3G9XPdR7XK896RULdNe2JA3nufKrIrfIjRH8MaKXIxq5aZL82PLSUKKY45Nu5w7FySXsU1Ym4uRJykjKYTjLZWosfn0M0hTOOcBIFvjDNNoZNKHzmOOhWblxPRgLg4UTQPmDZoTi+vXl65YtZdriYpm2sFBuHz688mBX2Levt5wmONut5/zccEOxMnEYoOdwP/BAuW1E/Nxc+SNuT5wFWB1fH3Js7TRK2JoLhVnT3o6jN2vg23cEbAefeugVFvL4QmTKcp1uLNqp1XVcIt31hFyfuKf6hSfv2nXFvKeEvmvVILwdSsPpOM8mh5sfKwDqFv8+uCu45BD6Mf2sagWiKoR1irCPaS/UB1dfJPVx8zj50rIqsNvFILRtY0N0cq2DX3UMvr3dJiTx+zHnk5pc6+qDKy+0so7N2FgpSuxVdjqdcuLt6Ggp7sfHS2E/OVmK+v37AXbvLv/uvNPZVG3cdls56Oh0eqLaXkPeOPMm7IYS8rZwNRcuW2SbV3Ohs8tQZe3zbFx6uy/2ecdC1NxFGB3thVGZeH3zvtPpCX08IOAORuxwLc5FD4v8kEtFCX8DXqHH3s/1nhLoWJjjmH8A+lkBXOfN5+LjMqFyKeSoN7dQMsSsve4TmRwBSrXrQjpPIkVw5hDcuR/GlbJPimMfk87Nr6IsdayhNfFjUBc/TNO0IbVcOheWwPcdsPRBAJzYeFccPQ4ZoSbODku4jhRuTH6O9fIB3CvtcL6AlJAxP2RmVZ2lpVLgA/SehDsxUYrObhdg8+ZS9O/bB7B3L93TEOPjZZ1moq957XR67XQ6vVe7/yMjZR/uvbdc4rPb7V+KEqD/R9u3TKEt3G0Bix84ZYS0PSCgxKdpj7pomIEGQM/NN+Vst9+IX1vE4oGF3Y7pZ+gCgsvgCbAuNx5jBk2uwQM1mDDv7WU47WO12/OF7Njp9vnwCX3TDj5/KeE5rjJNIKVfoePnfM85/XEJ+xQHOdRm6j5SN5qbJp04S6XlEPpUX6j+uPaNSefmc8vk3C+ENExHCdNmExgjdvBTDloitpuybn5TqCMkR9oGZzlNF9QPkB2Xb1Z8ASid/bGxMrxl/fpSWO/cCXDiiWXozu7dAI8+Wq6db6+4E2J+vlyO0wYLfWoFGeNoHzxYvk5NlYOQDRt6At8W7fa+lAuPxbzZHhvruTimvHHeAfwTVrHotEWTvVymqQeLT/tYTZvLyz1BZbfR6fQLfZznwxbbXJfbFum2OA/hisvnDjB87+1j9YXuAPQP9vA+dprdb1eeq1wV1C0aXEJFKmAo8R5KixG8nLzUfWLajRH01H6xgj12kMHtVyg9JU9SJna/HEvYKvlpyuIsrXiSLQA9cdMXzpOj7LAgPZ6q4/VzrOJjX1hN+Eq3WwoiI7QBevH409O9h74eOVIK/ZmZ8vXwYYBTTinT77mnFN1TU/zjMY69ebWXkbQdfHvb9Nle+cZeWhJg9asRwfbTZm2X3j43lJCxBwp4BRz7z5UPQLdngx1+E55DCXrTR3N8RrxSYt8F9WArLtTEWYngd03ANfmh91SdeKCCH1JFXdC59bnK+OpoM755CWbbdvF9YQ0cce+KOY8Vsr5+SIh1n1OXDM0l6iX9qyO+Poebn3M/ikEtg6uUNEVDpkShiAV+SEz7hDiux9eGXRfepupwlR2mGHwJvjscvmVGQ/VRT3mLicc3+H4Q8covZjLr+Hhv9R0j+mdny1j96WmA444rBf999/Xi9l1PxDUrxdjC3ghW7N5TfwC9ibIAPefdhL4YIWKEB3a7zYDGpJv3PmLFvcmzz73LJTfvzfHZx2UPchYW+u9GmHxzzBjXsYWeXmv2dcXWu9bYD72aspywHdf54eaZ48R9BnCHc1GfD0YyoGkSrv5IBk+UyOfCEffUYMDXx0GK/JgHecWKeipNIsCrfAJtncI+VdBX5d5rmE4cTVrAJAfJITqxy1vmXBYztn/DDOdLmmvN/NS6AVZfPG1hZ8S2EfbmoVDm6a+Tk+Xrtm2lo79jR7lqzdRU72FSR46Ua8+bfY37ji/y9qstgClhbwsDW1DbAwNbyJtjMa9GNJty5j1eltJs23VJRH0onxIzFNQEYTNR1x7A4D/b1edMHKNEPhfOEpwmn4pnt8OW7PQ68T2hVtqXqifhSgYN3LK+QQw+HnsgZUS++Y75JlpjfBPYcR/we2rblRZCKvhC/0s5xHFqWcldBGl7qXmcfGm5ELlFvfQ3UsW/G0q3tDkipNYQHaVa8F2LOuG0K11lB6DncJv8TqcUwuPjsPLArE6nFPKdTjlpdt260knesqUU+tu2lY7+4cPl2vqzs6XzbK9u4+sLjqcP5YeEvXm1Rbw9OMDiHw8A7LI4xAe3Yy6iZoUfO9yoa/33U6v3YOzPwRZPdmiVD1NvqBzeh+vGYyfeNQkXgDcR1/TVtewlFp1UnmvbLm+nAbhdfgD3xGnf+eMQ6+5Twjtn/Xh/33mNwd7fN9ANCX7XfgDxgk4SohErYqscAEgnJ6uw76HhOUoOGh2Db7Yly29y6m3raCyEayUhDilhNq79Y8rgC6i5iBvhZuLBTVy+EfKLi6VLbz9Myjw11rzOzpZ/c3Nl2tJSz82316UH6I/J9uGarGYLZWoCrWvbFu3YqbfFOuWUA/SvW0/l4/ZwWfszsPtvHxvAasFmQnNwDL6rD/ar61wauC4sxnyGo6Or172n6sYi31Uf1XcJvn5wylJIBk0Aq7/XqQMBrtDn1udqwzWYMq8uF9/VX4DVYWuSV7y/IUbIcpDsm1sYxwxg6nLpc+RLy2F0smz7oTRPm/Xi0Dr4PiE/zCI/ljqd/5ilNE2afYG3BbGJzzcPxrInh5r13I3It8X/3FwpBswTZ5eWyjZsgW+LEE4MNL74YzFNiV8qjQrNAXBP3HWJaVMHAL1uvUTY42P1Odlc0WnKhwZTdnlfTD3G5ejb/ffF+5tyrnXuXcfvc+opQUyVw+k4z0AJKd/5d4mR0GfAHXDYuMrH1EOdW5fID2GfV+o7LhX3kpV2JGVy7Bsrfn375Qq9ScnLkS8tZ1OFqFf3XslFrQK/rph86dr8a5WQyK5K8MeutAOwOhwDoBTwy8u9VzPp1X5qrBH45tWstT83V5YxT5o1jr4R/HjdeCoOXXoMtqj2uemU+AegXXq7LtxGqojnXHzxIMgW7b7BBfUqQbIPdvRxPS7hb+fj97heADqcB7eD+085+lR6KC8G12Rfg8/tj+lDjOD3DZpywfmf4ayy4zInJP1IIZegjXHmQ/VWKeS5ZSTlDFW783WLeo3Dp3HF4AO0UzsOrYOvhGnLjHEsrnzp2M3DTv78fBnaYwt6I/rtwQEW+Fjch1x93H/83iX6KZFOiX/XvICQeHZNJsTvqe0QrvJ22ERKPbgM57xTFzLfk2x9Dr7rfWi9e584DcXj+8Q0Pq5QPlUGYLW4cMX9c+6wcD4XTp+oMvhc2J9RrGCx/98Mvv9XQ+z/T6yoShFjnH1DAjZ2Mi+nfRX2SlPwrQrYRgYq8KUx+Hg7tSzuw1rEF8aUu42UOrG4srfthzEB9OLojVg3f2ZirZ1mhL4t7O1X6g+g/+FQWKBwnEj83vUALLucVLRzhIpUhHDdVyyYTb32k3PxsVIimwtHiFP14VV3OK69771pi2oDO/yufWJcfdyXGMy+LsHveyqv7y4F1YZrm9M/12Ae57tWbHJ9512DaYDw/1jKQDmXkxojRrkCsykiPeY3gUsOMV+lYFfHfTC0WR8OVODnCtGx04yox9uSupS85Bws+Fw8g1nRxQh+I9hNWbNtytqC3gh3SuBTcflUyA5HtPhEQIzr7hIYPuGRIgi48fIU1ERIDDU4kMIR5bHr37v6g8uZNgz4QVdYKNv7uRx6n3MvcfW5Awh8hwKgf2WhpuIT951O73Og/hddg2+usI9173M6xjFis04hXoXrXreor8uBj/n90zAcZShCdHTt+zy0LdbMJZJtV99sj4+vDrkB6Al9W8T7wnKw0Dd59qtB6qaGnD9KdBg4E/xy/Ni7BjLUqkOu89Hp9MoZR59au5zTZ4mrzRHonJVyfA61ayDhA4fEUHMEcuM6B1Q6HqjYjr7PyTfEnBNpH2NxhcOZNO78Ed97gPh4dgk5zkkVYroJQh4gTcwPMpSmrjtACk2boz6GQuAreaDumrQlRh8gLAqNuDBiEru1lKAHWB2Ow3XvU8IluALdd6zS+jFU/+1jpkIlAPyTWSmkMfoAYWEv2Rc7+gBuV9/s5xosSMXn8jIdEkOJEYnYldwFCIUHUXckKJGP+8q9k+UbLEnTfW48fnWte2/2wfNhcPs+sQ8QHoTjNnOTWqd0/6oEeuxxtFXQG+r+/BQ3voiSJtMYgR8TVy/FFZPvWptfaafQp9JtYYPdfYCwKx96teuw67GJFfySH+rct/jxcVADGDwoosQ+gF/s23nSNd1tpMKBcvcBaKFv+skJz6G2U6BEv6svPheeSg+l+UKbAFY7+aGwKImLz0ESDmJecXiO7eDbzr2dR5XH7ccum1mlGMtVd0w9dQh5gPaLeYOK8ubRZj3YGIFf11KXGs6zduCEZOCLNBbsVMw5J+7eJWByChuK2AuEr1+uQQw+Z9TdD1yGI/jtpTVjnnqbgk/oA/DFfh24zo3k7oxLyLu27fe+ux0Skc+9G8Nx0QHo50LYZWxxT/0ZfHW46kxdbrYqctcfW9+gRLyhCjE/yN8AHRBUT5tN38YIfKV5tPmLbeMTMa6yLhcTwL8qCi7LSa8TSThESOABrA53sh187ObbYFGIsdNw+A6emOsSiCkXv5DQN/2y++oSxr60FEc9ZbUbqbA321w3H39nOE5+6ueInXeqDvPqE3p4gGDXIRX2VN8ldxzqJGebgxbyAMMn5pXBgVd8bIsuGgqBzwnv0WUy5fjuorQhZIeCG9LgE0SuH3mfgHct2Yepc2WS0IAjFIMPsDrcCS876ZrX4HP2DeZc4CeTpoTwpIJXxAEIu/pcIV8F+FxxBg+ubd977ObjkJ1QHWab6iPVd8pJdz1Dwv5zPfXZl4/rA+A/TyJG4DdBRFbVhyaLd0MTzn+INvRxWGhzlEcrBb5LXOJ0e9snSNs6Ohskrtj8tjw8y4YSPa68UD0hhzzVxc8pbkOTYEPHQ+WZdFvou1xb290NOeSmn5RAwHdUDLGfIxfbuTZQdyU4Tj4u60uT7Bv7EC5piA1286UhO/i4fLjEvZ2P/+xj54p7XB+G89yJ0LHl+F42SexV8WCoqmPkm3T+FCUnrRT4OQV4m0dng8Q1CXqQ4j7XhOCQQ++7IPgGC6F9Xe3F7s+tk4ISzdRqMvg95dYbwW8GFK4yALRI5Q5qXOVca+3ngBL5AHSYDADPsea6+5JyvqfV+lYJkrbnis3PJaKwiHY9NK1K5z7nw65yisuqn7qagyavG990hvGY2kCbF2JppcBXmsEgv+ShB5hVQUj4S/bJsW/uOinn1+BbTYYj/GxnHwshytG366bcfQkuhz8XLpFv8C0lCeAeBIbSfeU4zr7P1fcNLl0OPE7jrrRjkHxv7W38ACqfc0+92uIet2GnxYh76v+C83/dBrFOUfeKNE0Tvdz+SAyHmGNs2nlpO20S9TatFPhVuMQaoiMjtC4sDnty1ZHzs6Qe1FXHHQXpj2nVoSMugcbZD4An7gx4pSFcDrv0Jmbfjr2396HuBJhX6rgkoj9UNkaccNv3TSh2iVxuOucOABXv7nrCLncpUOk8AjxRGp8LTr2u2HeX8x7j3FN1mDZdwl7q3nMFfJOWcKySpgrSqvpV5fE29Vy2lTZHebRS4Nct4JRm4/uHa+KcAK6LXlUboXYocYjzbJcWwB137QtNwBNsjaDinoNQ6IkUat+c9QO43XwD1z33leek4W2uox/z3vQ7tOQoPlaD64FRWIzb+0vFva8Ou027nNS9D4n6YRTzbRGbbemnokhopcC3yT2aatPorG2kxOtT+3I/qzYNCHNdaEIiWXIr2RVm4BOMtrNPufr4lROvj18p0esbIMSI8ypW7OGIfAC+0Kecfk6ay9GnhH6sa08Rc4fJJ6Kx0HfF03ePXe3McbrEP67HbpMaVEhFPVfIq+jko+eqhHse9HzxafPKi60X+EozcYXwuGLnqdV4cHrIjef805k62vSE3hh8IRax9fgmDPvi5jkTLV0O8NISvca+SyS6wlMA/MKq7qU3QyIfICz0uelSR58r9PF+1CCAM7jgQIloSnT7Qm7sfaTi3iXqXeJeKuxVcK1Gz4kMPV/V0RZBj4kW+HXNLB6UGKPW0FfyQJ1Pl8teRfybb8Ax7HAuAq5BALWvayUYex/K2cdiEYt2SjRSa+zjV58opvptGIT454h8APcxhdLtPJ/z7yrDEfoYLN65Aw4K3IZL5LvWv8fbOCTHOPq4jDn2GNfeFvau71SsEFMBp7jQ70Z1rMkY/LoOOjSZ08a1dCO3HSzo2/RBDhN1n/dheYhXLlwOuK8sZxIoFQZiynMceuzsU0/QtdsMiVvXcUnEv03MQAA7vTGhO6G7LD5RT6Vxhb7pr09c+EK8qP7aea407NbbaS5XPmUyLUfcu+YJhI4rpozSPCSfW655VjFtx5Rf67QpJAczVCE6ksFAaH8Obf7gFTdrzdUPwRH8rjIcZ99expIKx7Hrc4lQU4drJR/cpgtf+EhoX9dAQCL8favtGDjiOCVG31VGsnSq3TZngMjJw+64S4T7hD1VxnUXwO6D6zUk7CXHpwyOOj+LUFsxd7eUavE9DLXJOnCoBH6Kg4/hhOg09UNV0mnTxNy6kQp+joA0hMS+T7zjvlAhPS6321cPPibOBdguw7kD4HoiLsa32ozdLnfAxYmHl8TMc1YiAug/Dt/qMtTqNfZ7SribOmOEPVUn9Yr7zRX2KcKsaaIuhxNN3Wlb61R5HvQcx+Ezj5usA4cyBj+HIFNRpxjU0ffjCxGx86UTLl1i39UH3+RbvF6/vQ9+T21z+utyzX3lDFz3PxTS4/osJOmhz6cOQUbdCeC8Uktfhtx9qpzv1ewLwBP2kvPUJgGWq69tOmZl7dFkAR9iqGLwMdTgIGbCrm+FlzZ/+AofdfR5+MS+1D12hfHYdVEuPJVmHrCF830PX8L9d/U35JpjQgMiXDZUxhUCRD1MylcnlR4K+QnlxeAS9/Z7HCITEvV4H85Dq6oW9ipslTrR71scukxmQwkNDijRxnFrqfXYdcUdRelH6uxTaa4wHt/Smz5nPpTvE/wcsc0l5IJzRDOeBGuwj8Hl+EtceUmITgo+YW9vh55m60rLNYnWFTbkS5PkK4oyGGLMuyYL/laF6ODt1Jh7ah9OPaHBgjLc1LHqDjVobPOdA4nrK3H2DTgExzW4CL03Sye6ypm2bFxOuW8wEMrjhPhIxLivPfvcYUKr5HDqp/DV6Zu4KnXdXTH2nFe7LyHHPvR9rYLUenOv5qI0Ex1Q8nFpyTUXgz8IXAKnzcJHUQDo23+hH5I2f+8p9x7n2fm+uHC8wostwHM8hRUPKFwDDGq/3ITmAEjbpFbHseGuAITnN2B8E2lxHTaUm26/9wl7u06fY0/t7+tzneK+SnEWW7cODNqDivu1Tatj8Dnr4odi8GPcf0n5Jo/ulOYh/b603d0Pxa1z3H1KgFN1YbEaaluyTTn/vn1dfQJw3xEI1VVFXrcbLhODRBSHRD4VPsMJuwmJel8oTk5R3zYR1rb+No06Bkj6GcVBhV/73lPbTaJVDn4VtFUYKQr1RN5hEfzSCao+15+KVQ+t4Z7L7U/B5Z7HXrx9feP0OzSngtO+NN8l7kNCPJewp/rlOo5hF/ZKHqQhdDnqV9YmlcTgh0Y43Lh6vG2LFq7zjsvkFj5NHbkp9RGzwk4V3xvf3a6cz4ioA85FkBuDTg0aQgMJbsiNaylNFxKHP3RXIJTnW5Ped4eAU3cqkhh8gPB6+Hg75PrjduoQ9Sq8FBf63Wge3Gt0kzVgNgdfEkrj2/ZNYMUuJc4fhIhxLaGprB1SvmtNvr3XNDgOsq9MKLbfTg89tRWAPwGVQ6y7nurKA/iX2aTgDA5sOPH3vva4ITu+PF9sfcyqOCrsFUUBaPY1PFsMflVIBgOckIWqBwMan69wGISLjgfK+H2biHX4TbmQI8tZDx6D1+rH+AYE3JVzpHmcMpw7BVUidcdD7jre9gl6zv7cPEkZRVGajW+VRbw8Os6PjWbJTeti8KWinTqhuL46UDGvDBLO0q7DEr9v4Lj8rrKuulLEm29FmtDE2pBTb8hVBpND/HPPnVRgSwU9tx5OfyRlFEVRbKrWhWyBnxpHnzPmPiTSByXibVTQKyEG+R2RhNS1dfKuZDKbz1n35Uv7wxXKvvh5GzwoCB2Hr0yOfbjkEPuhWP1QPerUK4oiAUeGUDoVl/dtp8C5FrMFfkocfa4wm9iwm0GEIYTa0wFAe8n1XfL9MCjVIF3BQuJq5xR8MavxhO4QYEKx9IMQsLHr5RtiRbsKekVRfFBLaA4yRIezX+NDdHINDnCZQTuROgCIw+Um+wZxkgHfoAeHTZ6wM6xwXG/Jfr79Je55nYKS+0CrKvAJdopUMa4TZBVFSQULfld+aDBgb+eOz68kRCdlX98IqIpwnkGhIi6Oqgdxg/6+NPF7EVrlykdT/t9i4Ii7WPc/VF/sGvy4nph9m0bV68vXdexN/N+umzb/HiiKTcgMzG0WhuaTUmRx8EO3KST7UnnSibJN/BHRH/f6aOP3Y1jDdahjaeL5jyX3xFSJ2Mw9uGgLscdW5TkZpv/ZKsl1nobpN0RpPzm/1zl/S7LE4EsnFkjCbHz1NDkGH5PS/jBfPHJ8LoP+bHMjPZ62fT+aEiZXNbkFZZUx/02i6uNq2/+Lspph+gyH/XdwWPGF6Pi2c4n4oYjB9+Fb+s8XvtG2fyiqv239gWvbuW8DbXX/2/i/OEhShG+da9wPeuDRpv8BRUmNdMiN/bscMk+VZjNSFP61FNatWwc7duyoqz+KoiiKoiiKonh45JFHYG5uzpkfFPiKoiiKoiiKorSHIY3iVBRFURRFUZS1iQp8RVEURVEURRkiVOAriqIoiqIoyhChAl9RFEVRFEVRhggV+IqiKIqiKIoyRKjAVxRFURRFUZQh4v8D2J6x/z753soAAAAASUVORK5CYII=", "text/plain": [ "
" ] @@ -299,7 +299,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -311,7 +311,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "19:50:37 C02YR4ANLVCJ SmartSim[54122] INFO fv_simulation(54161): Completed\n" + "20:37:31 HPE-C02YR4ANLVCJ SmartSim[25938:JobManager] INFO fv_simulation(26039): SmartSimStatus.STATUS_COMPLETED\n" ] } ], @@ -335,7 +335,7 @@ "\n", "# Use the Experiment API to wait until the model is finished\n", "while not exp.finished(model):\n", - " time.sleep(5)" + " time.sleep(5)\n" ] }, { @@ -378,6 +378,7 @@ }, { "cell_type": "code", + "id": "6f3ed63d-e324-443d-9b68-b2cf618d31c7", "execution_count": 7, "metadata": {}, "outputs": [ @@ -385,10 +386,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Default@19-50-39:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n", - "Default@19-50-40:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n", - "Default@19-50-41:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n", - "Default@19-50-43:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n" + "Default@20-37-33:ERROR: Redis IO error when executing command: Failed to get reply: Resource temporarily unavailable\n" ] } ], @@ -397,11 +395,12 @@ "\n", "probe_x, probe_y = np.meshgrid(range(20, 400, 20), range(20, 100, 20))\n", "client.put_tensor(\"probe_x\", probe_x)\n", - "client.put_tensor(\"probe_y\", probe_y)" + "client.put_tensor(\"probe_y\", probe_y)\n" ] }, { "cell_type": "markdown", + "id": "96c154fe-5ca8-4d89-91f8-8fd4e75cb80e", "metadata": {}, "source": [ "We then apply the function `probe_points` to the `ux` and `uy` tensors computed in the last time step of the previous simulation. Note that all tensors are already on the DB, thus we can reference them by name. Finally, we download and plot the output (a 2D velocity field), which is stored as `probe_u` on the DB." @@ -409,12 +408,13 @@ }, { "cell_type": "code", + "id": "36e3b415-dcc1-4d25-9cce-52388146a4bb", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -430,11 +430,12 @@ "client.run_script(\"probe\", \"probe_points\", inputs=[ux_name, uy_name , \"probe_x\", \"probe_y\", \"cylinder\"], outputs=[\"probe_u\"])\n", "\n", "probe_u = client.get_tensor(\"probe_u\")\n", - "plot_lattice_probes(time_steps-1, probe_x, probe_y, probe_u)" + "plot_lattice_probes(time_steps-1, probe_x, probe_y, probe_u)\n" ] }, { "cell_type": "markdown", + "id": "9d7e4966-a0de-480c-9556-936197a5a5d2", "metadata": {}, "source": [ "### Uploading a function inline\n", @@ -451,11 +452,12 @@ "import torch\n", "\n", "def compute_norm(ux: torch.Tensor, uy: torch.Tensor):\n", - " return torch.sqrt(ux*ux + uy*uy)" + " return torch.sqrt(ux*ux + uy*uy)\n" ] }, { "cell_type": "markdown", + "id": "1c4daf43-34d0-482a-b9b5-b3b6f1e173c4", "metadata": {}, "source": [ "We then store the function on the DB under the key `norm_function`." @@ -468,11 +470,12 @@ "metadata": {}, "outputs": [], "source": [ - "client.set_function(\"norm_function\", compute_norm)" + "client.set_function(\"norm_function\", compute_norm)\n" ] }, { "cell_type": "markdown", + "id": "19409ac6-e118-44db-a847-2d905fdf0331", "metadata": {}, "source": [ "Note that the key we used identifies a functional unit containing the function itself: this is similar to the key used to store the `probe` script above. When we want to run the function, we just call it with `run_script`, by indicating the `script` key as `\"norm_function\"` and the name of the function itself as `\"compute_norm\"`." @@ -486,7 +489,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -500,7 +503,7 @@ "client.run_script(\"norm_function\", \"compute_norm\", [f\"{{data_{i}}}.uy\", f\"{{data_{i}}}.ux\"], [\"u\"])\n", "u = client.get_tensor(\"u\")\n", "\n", - "plot_lattice_norm(time_steps-1, u, cylinder)" + "plot_lattice_norm(time_steps-1, u, cylinder)\n" ] }, { @@ -511,7 +514,7 @@ "outputs": [], "source": [ "# Optionally clear the database\n", - "client.flush_db(db.get_address())" + "client.flush_db(db.get_address())\n" ] }, { @@ -537,16 +540,16 @@ "text/html": [ "\n", "\n", - "\n", + "\n", "\n", "\n", - "\n", - "\n", + "\n", + "\n", "\n", "
Name Entity-Type JobID RunID Time Status Returncode
Name Entity-Type JobID RunID Time Status Returncode
0 fv_simulation Model 54161 0 38.1561Completed0
1 orchestrator_0DBNode 54134 0 66.5750Cancelled0
0 fv_simulation Model 26039 0 59.2839SmartSimStatus.STATUS_COMPLETED0
1 orchestrator_0DBNode 25963 0 75.2015SmartSimStatus.STATUS_CANCELLED0
" ], "text/plain": [ - "'\\n\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 fv_simulation Model 54161 0 38.1561Completed0
1 orchestrator_0DBNode 54134 0 66.5750Cancelled0
'" + "'\\n\\n\\n\\n\\n\\n\\n\\n
Name Entity-Type JobID RunID Time Status Returncode
0 fv_simulation Model 26039 0 59.2839SmartSimStatus.STATUS_COMPLETED0
1 orchestrator_0DBNode 25963 0 75.2015SmartSimStatus.STATUS_CANCELLED0
'" ] }, "execution_count": 14, @@ -555,7 +558,7 @@ } ], "source": [ - "exp.summary(style=\"html\")" + "exp.summary(style=\"html\")\n" ] } ], @@ -575,7 +578,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.10" + "version": "3.10.13" } }, "nbformat": 4, diff --git a/doc/tutorials/online_analysis/lattice/vishelpers.py b/doc/tutorials/online_analysis/lattice/vishelpers.py index 725c690fd..782692fac 100644 --- a/doc/tutorials/online_analysis/lattice/vishelpers.py +++ b/doc/tutorials/online_analysis/lattice/vishelpers.py @@ -11,7 +11,7 @@ def plot_lattice_vorticity(timestep, ux, uy, cylinder): np.roll(uy, -1, axis=1) - np.roll(uy, 1, axis=1) ) vorticity[cylinder] = np.nan - cmap = plt.cm.get_cmap("bwr").copy() + cmap = plt.get_cmap("bwr").copy() cmap.set_bad(color="black") plt.imshow(vorticity, cmap=cmap) plt.clim(-0.1, 0.1) @@ -30,7 +30,7 @@ def plot_lattice_norm(timestep, u, cylinder): plt.cla() u[cylinder] = np.nan - cmap = plt.cm.get_cmap("jet").copy() + cmap = plt.get_cmap("jet").copy() cmap.set_bad(color="black") plt.contour(u, cmap=cmap) plt.clim(-0.1, 0.1) @@ -47,7 +47,7 @@ def plot_lattice_probes(timestep, probe_x, probe_y, probe_u): fig = plt.figure(figsize=(12, 6), dpi=80) plt.cla() - cmap = plt.cm.get_cmap("binary").copy() + cmap = plt.get_cmap("binary").copy() cmap.set_bad(color="black") plt.quiver( probe_x, diff --git a/docker-compose.yml b/docker-compose.yml index f5be4e338..e65259162 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,3 @@ - - -version: '3' - services: docs-dev: image: smartsim-docs:dev-latest @@ -18,9 +14,9 @@ services: - "8888:8888" tutorials-prod: - image: smartsim-tutorials:v0.7.0 + image: smartsim-tutorials:v0.8.0 build: context: . dockerfile: ./docker/prod/Dockerfile ports: - - "8888:8888" \ No newline at end of file + - "8888:8888" diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile index 3ab3a37f8..faeeae8f3 100644 --- a/docker/dev/Dockerfile +++ b/docker/dev/Dockerfile @@ -36,9 +36,9 @@ RUN useradd --system --create-home --shell /bin/bash -g root -G sudo craylabs && apt-get update \ && apt-get install --no-install-recommends -y build-essential \ git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \ - python3-pip python3.9 python3.9-dev cmake \ + python3-pip python3 python3-dev cmake \ && rm -rf /var/lib/apt/lists/* \ - && ln -s /usr/bin/python3.9 /usr/bin/python + && ln -s /usr/bin/python3 /usr/bin/python WORKDIR /home/craylabs RUN git clone https://github.com/CrayLabs/SmartRedis.git --branch develop --depth=1 smartredis \ @@ -50,11 +50,11 @@ COPY . /home/craylabs/SmartSim RUN chown craylabs:root -R SmartSim USER craylabs -RUN cd SmartSim && SMARTSIM_SUFFIX=dev python -m pip install .[ml] +RUN cd SmartSim && SMARTSIM_SUFFIX=dev python -m pip install . RUN export PATH=/home/craylabs/.local/bin:$PATH && \ echo "export PATH=/home/craylabs/.local/bin:$PATH" >> /home/craylabs/.bashrc && \ - python -m pip install jupyter jupyterlab matplotlib && \ + python -m pip install jupyter jupyterlab "ipython<8" matplotlib && \ smart clobber && \ smart build --device cpu -v && \ chown craylabs:root -R /home/craylabs/.local && \ diff --git a/docker/docs/dev/Dockerfile b/docker/docs/dev/Dockerfile index e9db9c342..dbac524bc 100644 --- a/docker/docs/dev/Dockerfile +++ b/docker/docs/dev/Dockerfile @@ -55,8 +55,7 @@ RUN git clone https://github.com/CrayLabs/SmartDashboard.git --branch develop -- && rm -rf ~/.cache/pip # Install docs dependencies and SmartSim -RUN python -m pip install -r doc/requirements-doc.txt \ - && NO_CHECKS=1 SMARTSIM_SUFFIX=dev python -m pip install . +RUN NO_CHECKS=1 SMARTSIM_SUFFIX=dev python -m pip install .[docs] # Note this is needed to ensure that the Sphinx builds. Can be removed with newer Tensorflow RUN python -m pip install typing_extensions==4.6.1 diff --git a/docker/prod-cuda11/Dockerfile b/docker/prod-cuda11/Dockerfile new file mode 100644 index 000000000..fc2747905 --- /dev/null +++ b/docker/prod-cuda11/Dockerfile @@ -0,0 +1,61 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +FROM ubuntu:22.04 + +LABEL maintainer="Cray Labs" +LABEL org.opencontainers.image.source https://github.com/CrayLabs/SmartSim + +ARG DEBIAN_FRONTEND="noninteractive" +ENV TZ=US/Seattle + +# Make basic dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y build-essential \ + git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \ + python3-pip python3 python3-dev cmake wget apt-utils + +# # Install Cudatoolkit 11.8 +ENV TERM="xterm" +RUN wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \ + chmod +x ./cuda_11.8.0_520.61.05_linux.run && \ + ./cuda_11.8.0_520.61.05_linux.run --silent --toolkit && \ + rm ./cuda_11.8.0_520.61.05_linux.run + +# Install cuDNN 8.9.7 +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb && \ + dpkg -i libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb && \ + rm ./libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb + + # Install SmartSim and SmartRedis + RUN pip install git+https://github.com/CrayLabs/SmartRedis.git && \ + pip install "smartsim @ git+https://github.com/CrayLabs/SmartSim.git" + + ENV CUDA_HOME="/usr/local/cuda/" + ENV PATH="${PATH}:${CUDA_HOME}/bin" + + # Build ML Backends + RUN smart build --device=gpu --onnx diff --git a/docker/prod-cuda12/Dockerfile b/docker/prod-cuda12/Dockerfile new file mode 100644 index 000000000..bbdfd3513 --- /dev/null +++ b/docker/prod-cuda12/Dockerfile @@ -0,0 +1,64 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +FROM ubuntu:22.04 + +LABEL maintainer="Cray Labs" +LABEL org.opencontainers.image.source https://github.com/CrayLabs/SmartSim + +ARG DEBIAN_FRONTEND="noninteractive" +ENV TZ=US/Seattle + +# Make basic dependencies +RUN apt-get update \ + && apt-get install --no-install-recommends -y build-essential \ + git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \ + python3-pip python3 python3-dev cmake wget + +# Install Cudatoolkit 12.5 +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update -y && \ + apt-get install -y cuda-toolkit-12-5 + +# Install cuDNN 8.9.7 +RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb && \ + dpkg -i libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb + +# Install SmartSim and SmartRedis +RUN pip install git+https://github.com/CrayLabs/SmartRedis.git && \ + pip install git+https://github.com/CrayLabs/SmartSim.git@cuda-12-support + +ENV CUDA_HOME="/usr/local/cuda/" +ENV PATH="${PATH}:${CUDA_HOME}/bin" + +# Install machine-learning python packages consistent with RedisAI +# Note: pytorch gets installed in the smart build step +# This step will be deprecated in a future update +RUN pip install tensorflow==2.15.0 + +# Build ML Backends +RUN smart build --device=cuda121 diff --git a/docker/prod/Dockerfile b/docker/prod/Dockerfile index 325ace923..f8560f7bd 100644 --- a/docker/prod/Dockerfile +++ b/docker/prod/Dockerfile @@ -36,19 +36,21 @@ RUN useradd --system --create-home --shell /bin/bash -g root -G sudo craylabs && apt-get update \ && apt-get install --no-install-recommends -y build-essential \ git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \ - python3.9 python3.9-dev python3-pip cmake \ + python3-pip python3 python3-dev cmake \ && rm -rf /var/lib/apt/lists/* \ - && ln -s /usr/bin/python3.9 /usr/bin/python + && ln -s /usr/bin/python3 /usr/bin/python WORKDIR /home/craylabs -COPY --chown=craylabs:root ./tutorials/ /home/craylabs/tutorials/ +COPY --chown=craylabs:root ./doc/tutorials/ /home/craylabs/tutorials/ USER craylabs RUN export PATH=/home/craylabs/.local/bin:$PATH && \ echo "export PATH=/home/craylabs/.local/bin:$PATH" >> /home/craylabs/.bashrc && \ - python -m pip install smartsim[ml]==0.7.0 jupyter jupyterlab matplotlib && \ + python -m pip install smartsim==0.8.0 jupyter jupyterlab "ipython<8" matplotlib && \ smart build --device cpu -v && \ chown craylabs:root -R /home/craylabs/.local && \ rm -rf ~/.cache/pip +WORKDIR /home/craylabs/tutorials/ + CMD ["/bin/bash", "-c", "PATH=/home/craylabs/.local/bin:$PATH /home/craylabs/.local/bin/jupyter lab --port 8888 --no-browser --ip=0.0.0.0"] diff --git a/pyproject.toml b/pyproject.toml index 91164a68b..62df92f0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ [build-system] -requires = ["setuptools", "wheel", "cmake>=3.13"] +requires = ["packaging>=24.0", "setuptools>=70.0", "wheel", "cmake>=3.13"] build-backend = "setuptools.build_meta" [tool.black] diff --git a/setup.cfg b/setup.cfg index 742386d2c..1ea8d2518 100644 --- a/setup.cfg +++ b/setup.cfg @@ -51,9 +51,6 @@ classifiers = [options] packages = find: -setup_requires = - setuptools>=39.2 - cmake>=3.13 include_package_data = True python_requires = >=3.9,<3.12 diff --git a/setup.py b/setup.py index 6e46ddef9..571974d28 100644 --- a/setup.py +++ b/setup.py @@ -77,9 +77,6 @@ from pathlib import Path from setuptools import setup -from setuptools.command.build_py import build_py -from setuptools.command.install import install -from setuptools.dist import Distribution # Some necessary evils we have to do to be able to use # the _install tools in smartsim/smartsim/_core/_install @@ -95,12 +92,6 @@ buildenv = importlib.util.module_from_spec(buildenv_spec) buildenv_spec.loader.exec_module(buildenv) -# import builder module -builder_path = _install_dir.joinpath("builder.py") -builder_spec = importlib.util.spec_from_file_location("builder", str(builder_path)) -builder = importlib.util.module_from_spec(builder_spec) -builder_spec.loader.exec_module(builder) - # helper classes for building dependencies that are # also utilized by the Smart CLI build_env = buildenv.BuildEnv(checks=False) @@ -128,60 +119,7 @@ class BuildError(Exception): pass - -# Hacky workaround for solving CI build "purelib" issue -# see https://github.com/google/or-tools/issues/616 -class InstallPlatlib(install): - def finalize_options(self): - super().finalize_options() - if self.distribution.has_ext_modules(): - self.install_lib = self.install_platlib - - -class SmartSimBuild(build_py): - def run(self): - database_builder = builder.DatabaseBuilder( - build_env(), build_env.MALLOC, build_env.JOBS - ) - if not database_builder.is_built: - database_builder.build_from_git(versions.REDIS_URL, versions.REDIS) - - database_builder.cleanup() - - # run original build_py command - super().run() - - -# Tested with wheel v0.29.0 -class BinaryDistribution(Distribution): - """Distribution which always forces a binary package with platform name - - We use this because we want to pre-package Redis for certain - platforms to use. - """ - - def has_ext_modules(_placeholder): - return True - - # Define needed dependencies for the installation -deps = [ - "psutil>=5.7.2", - "coloredlogs>=10.0", - "tabulate>=0.8.9", - "redis>=4.5", - "tqdm>=4.50.2", - "filelock>=3.4.2", - "protobuf~=3.20", - "jinja2>=3.1.2", - "watchdog>=4.0.0", - "pydantic==1.10.14", - "pyzmq>=25.1.2", - "pygithub>=2.3.0", -] - -# Add SmartRedis at specific version -deps.append("smartredis>={}".format(versions.SMARTREDIS)) extras_require = { "dev": [ @@ -199,26 +137,54 @@ def has_ext_modules(_placeholder): "types-redis", "types-tabulate", "types-tqdm", - "types-tensorflow==2.12.0.9", + "types-tensorflow", "types-setuptools", "typing_extensions>=4.1.0", ], - # see smartsim/_core/_install/buildenv.py for more details - **versions.ml_extras_required(), + "docs": [ + "Sphinx==6.2.1", + "breathe==4.35.0", + "sphinx-fortran==1.1.1", + "sphinx-book-theme==1.0.1", + "sphinx-copybutton==0.5.2", + "sphinx-tabs==3.4.4", + "nbsphinx==0.9.3", + "docutils==0.18.1", + "torch==2.0.1", + "tensorflow>=2.14,<3.0", + "ipython", + "jinja2==3.1.2", + "sphinx-design", + "pypandoc", + "sphinx-autodoc-typehints", + "myst_parser", + ], } # rest in setup.cfg setup( version=smartsim_version, - install_requires=deps, - cmdclass={ - "build_py": SmartSimBuild, - "install": InstallPlatlib, - }, + install_requires=[ + "packaging>=24.0", + "psutil>=5.7.2", + "coloredlogs>=10.0", + "tabulate>=0.8.9", + "redis>=4.5", + "tqdm>=4.50.2", + "filelock>=3.4.2", + "GitPython<=3.1.43", + "protobuf<=3.20.3", + "jinja2>=3.1.2", + "watchdog>4,<5", + "pydantic>2", + "pyzmq>=25.1.2", + "pygithub>=2.3.0", + "numpy<2", + "smartredis>=0.6,<0.7", + ], zip_safe=False, extras_require=extras_require, - distclass=BinaryDistribution, entry_points={ "console_scripts": [ "smart = smartsim._core._cli.__main__:main", diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py index 951521f17..5d094b72f 100644 --- a/smartsim/_core/_cli/build.py +++ b/smartsim/_core/_cli/build.py @@ -25,26 +25,34 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import argparse +import importlib.metadata +import operator import os -import platform -import sys +import re +import shutil +import textwrap import typing as t from pathlib import Path from tabulate import tabulate from smartsim._core._cli.scripts.dragon_install import install_dragon -from smartsim._core._cli.utils import SMART_LOGGER_FORMAT, color_bool, pip +from smartsim._core._cli.utils import SMART_LOGGER_FORMAT from smartsim._core._install import builder -from smartsim._core._install.buildenv import ( - BuildEnv, - DbEngine, - SetupError, - Version_, - VersionConflictError, - Versioner, +from smartsim._core._install.buildenv import BuildEnv, DbEngine, Version_, Versioner +from smartsim._core._install.mlpackages import ( + DEFAULT_MLPACKAGE_PATH, + DEFAULT_MLPACKAGES, + MLPackageCollection, + load_platform_configs, ) -from smartsim._core._install.builder import BuildError, Device +from smartsim._core._install.platform import ( + Architecture, + Device, + OperatingSystem, + Platform, +) +from smartsim._core._install.redisaiBuilder import RedisAIBuilder from smartsim._core.config import CONFIG from smartsim._core.utils.helpers import installed_redisai_backends from smartsim.error import SSConfigError @@ -55,25 +63,6 @@ # NOTE: all smartsim modules need full paths as the smart cli # may be installed into a different directory. -_TPinningStr = t.Literal["==", "!=", ">=", ">", "<=", "<", "~="] - - -def check_py_onnx_version(versions: Versioner) -> None: - """Check Python environment for ONNX installation""" - _check_packages_in_python_env( - { - "onnx": Version_(versions.ONNX), - "skl2onnx": Version_(versions.REDISAI.skl2onnx), - "onnxmltools": Version_(versions.REDISAI.onnxmltools), - "scikit-learn": Version_(getattr(versions.REDISAI, "scikit-learn")), - }, - ) - - -def check_py_tf_version(versions: Versioner) -> None: - """Check Python environment for TensorFlow installation""" - _check_packages_in_python_env({"tensorflow": Version_(versions.TENSORFLOW)}) - def check_backends_install() -> bool: """Checks if backends have already been installed. @@ -115,8 +104,6 @@ def build_database( database_builder = builder.DatabaseBuilder( build_env(), jobs=build_env.JOBS, - _os=builder.OperatingSystem.from_str(platform.system()), - architecture=builder.Architecture.from_str(platform.machine()), malloc=build_env.MALLOC, verbose=verbose, ) @@ -125,220 +112,92 @@ def build_database( f"Building {database_name} version {versions.REDIS} " f"from {versions.REDIS_URL}" ) - database_builder.build_from_git(versions.REDIS_URL, versions.REDIS_BRANCH) + database_builder.build_from_git( + versions.REDIS_URL, branch=versions.REDIS_BRANCH + ) database_builder.cleanup() - logger.info(f"{database_name} build complete!") + logger.info(f"{database_name} build complete!") + else: + logger.warning( + f"{database_name} was previously built, run 'smart clobber' to rebuild" + ) def build_redis_ai( + platform: Platform, + mlpackages: MLPackageCollection, build_env: BuildEnv, - versions: Versioner, - device: Device, - use_torch: bool = True, - use_tf: bool = True, - use_onnx: bool = False, - torch_dir: t.Union[str, Path, None] = None, - libtf_dir: t.Union[str, Path, None] = None, - verbose: bool = False, - torch_with_mkl: bool = True, + verbose: bool, ) -> None: - # make sure user isn't trying to do something silly on MacOS - if build_env.PLATFORM == "darwin" and device == Device.GPU: - raise BuildError("SmartSim does not support GPU on MacOS") - - # decide which runtimes to build - print("\nML Backends Requested") - backends_table = [ - ["PyTorch", versions.TORCH, color_bool(use_torch)], - ["TensorFlow", versions.TENSORFLOW, color_bool(use_tf)], - ["ONNX", versions.ONNX, color_bool(use_onnx)], - ] - print(tabulate(backends_table, tablefmt="fancy_outline"), end="\n\n") - print(f"Building for GPU support: {color_bool(device == Device.GPU)}\n") - - if not check_backends_install(): - sys.exit(1) - - # TORCH - if use_torch and torch_dir: - torch_dir = Path(torch_dir).resolve() - if not torch_dir.is_dir(): - raise SetupError( - f"Could not find requested user Torch installation: {torch_dir}" - ) - - # TF - if use_tf and libtf_dir: - libtf_dir = Path(libtf_dir).resolve() - if not libtf_dir.is_dir(): - raise SetupError( - f"Could not find requested user TF installation: {libtf_dir}" - ) - - build_env_dict = build_env() - - rai_builder = builder.RedisAIBuilder( - build_env=build_env_dict, - jobs=build_env.JOBS, - _os=builder.OperatingSystem.from_str(platform.system()), - architecture=builder.Architecture.from_str(platform.machine()), - torch_dir=str(torch_dir) if torch_dir else "", - libtf_dir=str(libtf_dir) if libtf_dir else "", - build_torch=use_torch, - build_tf=use_tf, - build_onnx=use_onnx, - verbose=verbose, - torch_with_mkl=torch_with_mkl, + logger.info("Building RedisAI and backends...") + rai_builder = RedisAIBuilder( + platform, mlpackages, build_env, CONFIG.build_path, verbose ) - - if rai_builder.is_built: - logger.info("RedisAI installed. Run `smart clean` to remove.") - else: - # get the build environment, update with CUDNN env vars - # if present and building for GPU, otherwise warn the user - if device == Device.GPU: - gpu_env = build_env.get_cudnn_env() - cudnn_env_vars = [ - "CUDNN_LIBRARY", - "CUDNN_INCLUDE_DIR", - "CUDNN_INCLUDE_PATH", - "CUDNN_LIBRARY_PATH", - ] - if not gpu_env: - logger.warning( - "CUDNN environment variables not found.\n" - f"Looked for {cudnn_env_vars}" - ) - else: - build_env_dict.update(gpu_env) - # update RAI build env with cudnn env vars - rai_builder.env = build_env_dict - - logger.info( - f"Building RedisAI version {versions.REDISAI}" - f" from {versions.REDISAI_URL}" - ) - - # NOTE: have the option to add other builds here in the future - # like "from_tarball" - rai_builder.build_from_git( - versions.REDISAI_URL, versions.REDISAI_BRANCH, device - ) - logger.info("ML Backends and RedisAI build complete!") - - -def check_py_torch_version(versions: Versioner, device: Device = Device.CPU) -> None: - """Check Python environment for TensorFlow installation""" - if BuildEnv.is_macos(): - if device == Device.GPU: - raise BuildError("SmartSim does not support GPU on MacOS") - device_suffix = "" - else: # linux - if device == Device.CPU: - device_suffix = versions.TORCH_CPU_SUFFIX - elif device == Device.GPU: - device_suffix = versions.TORCH_CUDA_SUFFIX - else: - raise BuildError("Unrecognized device requested") - - torch_deps = { - "torch": Version_(f"{versions.TORCH}{device_suffix}"), - "torchvision": Version_(f"{versions.TORCHVISION}{device_suffix}"), + rai_builder.build() + rai_builder.cleanup_build() + + +def parse_requirement( + requirement: str, +) -> t.Tuple[str, t.Optional[str], t.Callable[[Version_], bool]]: + operators = { + "==": operator.eq, + "<=": operator.le, + ">=": operator.ge, + "<": operator.lt, + ">": operator.gt, } - missing, conflicts = _assess_python_env( - torch_deps, - package_pinning="==", - validate_installed_version=_create_torch_version_validator( - with_suffix=device_suffix - ), + semantic_version_pattern = r"\d+(?:\.\d+(?:\.\d+)?)?([^\s]*)" + pattern = ( + r"^" # Start + r"([a-zA-Z0-9_\-]+)" # Package name + r"(?:\[[a-zA-Z0-9_\-,]+\])?" # Any extras + r"(?:([<>=!~]{1,2})" # Pinning string + rf"({semantic_version_pattern}))?" # A version number + r"$" # End ) + match = re.match(pattern, requirement) + if match is None: + raise ValueError(f"Invalid requirement string: {requirement}") + module_name, cmp_op, version_str, suffix = match.groups() + version = Version_(version_str) if version_str is not None else None + if cmp_op is None: + is_compatible = lambda _: True # pylint: disable=unnecessary-lambda-assignment + elif (cmp := operators.get(cmp_op, None)) is None: + raise ValueError(f"Unrecognized comparison operator: {cmp_op}") + else: - if len(missing) == len(torch_deps) and not conflicts: - # All PyTorch deps are not installed and there are no conflicting - # python packages. We can try to install torch deps into the current env. - logger.info( - "Torch version not found in python environment. " - "Attempting to install via `pip`" - ) - wheel_device = ( - device.value if device == Device.CPU else device_suffix.replace("+", "") - ) - pip( - "install", - "--extra-index-url", - f"https://download.pytorch.org/whl/{wheel_device}", - *(f"{package}=={version}" for package, version in torch_deps.items()), - ) - elif missing or conflicts: - logger.warning(_format_incompatible_python_env_message(missing, conflicts)) - - -def _create_torch_version_validator( - with_suffix: str, -) -> t.Callable[[str, t.Optional[Version_]], bool]: - def check_torch_version(package: str, version: t.Optional[Version_]) -> bool: - if not BuildEnv.check_installed(package, version): - return False - # Default check only looks at major/minor version numbers, - # Torch requires we look at the patch as well - installed = BuildEnv.get_py_package_version(package) - if with_suffix and with_suffix not in installed.patch: - raise VersionConflictError( - package, - installed, - version or Version_(f"X.X.X{with_suffix}"), - msg=( - f"{package}=={installed} does not satisfy device " - f"suffix requirement: {with_suffix}" - ), + def is_compatible(other: Version_) -> bool: + assert version is not None # For type check, always should be true + match_ = re.match(rf"^{semantic_version_pattern}$", other) + return ( + cmp(other, version) and match_ is not None and match_.group(1) == suffix ) - return True - return check_torch_version + return module_name, f"{cmp_op}{version}" if version else None, is_compatible -def _check_packages_in_python_env( - packages: t.Mapping[str, t.Optional[Version_]], - package_pinning: _TPinningStr = "==", - validate_installed_version: t.Optional[ - t.Callable[[str, t.Optional[Version_]], bool] - ] = None, -) -> None: - # TODO: Do not like how the default validation function will always look for - # a `==` pinning. Maybe turn `BuildEnv.check_installed` into a factory - # that takes a pinning and returns an appropriate validation fn? - validate_installed_version = validate_installed_version or BuildEnv.check_installed - missing, conflicts = _assess_python_env( - packages, - package_pinning, - validate_installed_version, - ) +def check_ml_python_packages(packages: MLPackageCollection) -> None: + missing = [] + conflicts = [] + + for package in packages.values(): + for requirement in package.python_packages: + module_name, version_spec, is_compatible = parse_requirement(requirement) + try: + installed = BuildEnv.get_py_package_version(module_name) + if not is_compatible(installed): + conflicts.append( + f"{module_name}: {installed} is installed, " + f"but {version_spec or 'Any'} is required" + ) + except importlib.metadata.PackageNotFoundError: + missing.append(module_name) if missing or conflicts: logger.warning(_format_incompatible_python_env_message(missing, conflicts)) -def _assess_python_env( - packages: t.Mapping[str, t.Optional[Version_]], - package_pinning: _TPinningStr, - validate_installed_version: t.Callable[[str, t.Optional[Version_]], bool], -) -> t.Tuple[t.List[str], t.List[str]]: - missing: t.List[str] = [] - conflicts: t.List[str] = [] - - for name, version in packages.items(): - spec = f"{name}{package_pinning}{version}" if version else name - try: - if not validate_installed_version(name, version): - # Not installed! - missing.append(spec) - except VersionConflictError: - # Incompatible version found - conflicts.append(spec) - - return missing, conflicts - - def _format_incompatible_python_env_message( missing: t.Collection[str], conflicting: t.Collection[str] ) -> str: @@ -349,20 +208,24 @@ def _format_incompatible_python_env_message( missing_str = fmt_list("Missing", missing) conflict_str = fmt_list("Conflicting", conflicting) sep = "\n" if missing_str and conflict_str else "" - return ( - "Python Env Status Warning!\n" - "Requested Packages are Missing or Conflicting:\n\n" - f"{missing_str}{sep}{conflict_str}\n\n" - "Consider installing packages at the requested versions via `pip` or " - "uninstalling them, installing SmartSim with optional ML dependencies " - "(`pip install smartsim[ml]`), and running `smart clean && smart build ...`" - ) + + return textwrap.dedent(f"""\ + Python Package Warning: + + Requested packages are missing or have a version mismatch with + their respective backend: + + {missing_str}{sep}{conflict_str} + + Consider uninstalling any conflicting packages and rerunning + `smart build` if you encounter issues. + """) def _configure_keydb_build(versions: Versioner) -> None: """Configure the redis versions to be used during the build operation""" versions.REDIS = Version_("6.2.0") - versions.REDIS_URL = "https://github.com/EQ-Alpha/KeyDB" + versions.REDIS_URL = "https://github.com/EQ-Alpha/KeyDB.git" versions.REDIS_BRANCH = "v6.2.0" CONFIG.conf_path = Path(CONFIG.core_path, "config", "keydb.conf") @@ -376,14 +239,33 @@ def _configure_keydb_build(versions: Versioner) -> None: def execute( args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, / ) -> int: + + # Unpack various arguments verbose = args.v keydb = args.keydb - device = Device(args.device.lower()) + device = Device.from_str(args.device.lower()) is_dragon_requested = args.dragon - # torch and tf build by default - pt = not args.no_pt # pylint: disable=invalid-name - tf = not args.no_tf # pylint: disable=invalid-name - onnx = args.onnx + + if Path(CONFIG.build_path).exists(): + logger.warning(f"Build path already exists, removing: {CONFIG.build_path}") + shutil.rmtree(CONFIG.build_path) + + # The user should never have to specify the OS and Architecture + current_platform = Platform( + OperatingSystem.autodetect(), Architecture.autodetect(), device + ) + + # Configure the ML Packages + configs = load_platform_configs(Path(args.config_dir)) + mlpackages = configs[current_platform] + + # Build all backends by default, pop off the ones that user wants skipped + if args.skip_torch and "libtorch" in mlpackages: + mlpackages.pop("libtorch") + if args.skip_tensorflow and "libtensorflow" in mlpackages: + mlpackages.pop("libtensorflow") + if args.skip_onnx and "onnxruntime" in mlpackages: + mlpackages.pop("onnxruntime") build_env = BuildEnv(checks=True) logger.info("Running SmartSim build process...") @@ -409,6 +291,9 @@ def execute( version_names = list(vers.keys()) print(tabulate(vers, headers=version_names, tablefmt="github"), "\n") + logger.info("ML Packages") + print(mlpackages) + if is_dragon_requested: install_to = CONFIG.core_path / ".dragon" return_code = install_dragon(install_to) @@ -420,42 +305,25 @@ def execute( else: logger.warning("Dragon installation failed") - try: - if not args.only_python_packages: - # REDIS/KeyDB - build_database(build_env, versions, keydb, verbose) - - # REDISAI - build_redis_ai( - build_env, - versions, - device, - pt, - tf, - onnx, - args.torch_dir, - args.libtensorflow_dir, - verbose=verbose, - torch_with_mkl=args.torch_with_mkl, - ) - except (SetupError, BuildError) as e: - logger.error(str(e)) - return os.EX_SOFTWARE + # REDIS/KeyDB + build_database(build_env, versions, keydb, verbose) + + if (CONFIG.lib_path / "redisai.so").exists(): + logger.warning("RedisAI was previously built, run 'smart clean' to rebuild") + elif not args.skip_backends: + build_redis_ai(current_platform, mlpackages, build_env, verbose) + else: + logger.info("Skipping compilation of RedisAI and backends") backends = installed_redisai_backends() backends_str = ", ".join(s.capitalize() for s in backends) if backends else "No" - logger.info(f"{backends_str} backend(s) built") - - try: - if "torch" in backends: - check_py_torch_version(versions, device) - if "tensorflow" in backends: - check_py_tf_version(versions) - if "onnxruntime" in backends: - check_py_onnx_version(versions) - except (SetupError, BuildError) as e: - logger.error(str(e)) - return os.EX_SOFTWARE + logger.info(f"{backends_str} backend(s) available") + + if not args.skip_python_packages: + for package in mlpackages.values(): + logger.info(f"Installing python packages for {package.name}") + package.pip_install(quiet=not verbose) + check_ml_python_packages(mlpackages) logger.info("SmartSim build complete!") return os.EX_OK @@ -463,7 +331,14 @@ def execute( def configure_parser(parser: argparse.ArgumentParser) -> None: """Builds the parser for the command""" - warn_usage = "(ONLY USE IF NEEDED)" + + available_devices = [] + for platform in DEFAULT_MLPACKAGES: + if (platform.operating_system == OperatingSystem.autodetect()) and ( + platform.architecture == Architecture.autodetect() + ): + available_devices.append(platform.device.value) + parser.add_argument( "-v", action="store_true", @@ -474,7 +349,7 @@ def configure_parser(parser: argparse.ArgumentParser) -> None: "--device", type=str.lower, default=Device.CPU.value, - choices=[device.value for device in Device], + choices=available_devices, help="Device to build ML runtimes for", ) parser.add_argument( @@ -484,40 +359,35 @@ def configure_parser(parser: argparse.ArgumentParser) -> None: help="Install the dragon runtime", ) parser.add_argument( - "--only_python_packages", + "--skip-python-packages", action="store_true", - default=False, - help="Only evaluate the python packages (i.e. skip building backends)", + help="Do not install the python packages that match the backends", ) parser.add_argument( - "--no_pt", + "--skip-backends", action="store_true", - default=False, - help="Do not build PyTorch backend", + help="Do not compile RedisAI and the backends", ) parser.add_argument( - "--no_tf", + "--skip-torch", action="store_true", - default=False, - help="Do not build TensorFlow backend", + help="Do not build PyTorch backend", ) parser.add_argument( - "--onnx", + "--skip-tensorflow", action="store_true", - default=False, - help="Build ONNX backend (off by default)", + help="Do not build TensorFlow backend", ) parser.add_argument( - "--torch_dir", - default=None, - type=str, - help=f"Path to custom /torch/share/cmake/Torch/ directory {warn_usage}", + "--skip-onnx", + action="store_true", + help="Do not build the ONNX backend", ) parser.add_argument( - "--libtensorflow_dir", - default=None, + "--config-dir", + default=str(DEFAULT_MLPACKAGE_PATH), type=str, - help=f"Path to custom libtensorflow directory {warn_usage}", + help="Path to directory with JSON files describing platform and packages", ) parser.add_argument( "--keydb", @@ -525,9 +395,3 @@ def configure_parser(parser: argparse.ArgumentParser) -> None: default=False, help="Build KeyDB instead of Redis", ) - parser.add_argument( - "--no_torch_with_mkl", - dest="torch_with_mkl", - action="store_false", - help="Do not build Torch with Intel MKL", - ) diff --git a/smartsim/_core/_cli/scripts/dragon_install.py b/smartsim/_core/_cli/scripts/dragon_install.py index 466c390bd..8028b8ecf 100644 --- a/smartsim/_core/_cli/scripts/dragon_install.py +++ b/smartsim/_core/_cli/scripts/dragon_install.py @@ -7,7 +7,7 @@ from github.GitReleaseAsset import GitReleaseAsset from smartsim._core._cli.utils import pip -from smartsim._core._install.builder import WebTGZ +from smartsim._core._install.utils import retrieve from smartsim._core.config import CONFIG from smartsim._core.utils.helpers import check_platform, is_crayex_platform from smartsim.error.errors import SmartSimCLIActionCancelled @@ -159,8 +159,7 @@ def retrieve_asset(working_dir: pathlib.Path, asset: GitReleaseAsset) -> pathlib if working_dir.exists() and list(working_dir.rglob("*.whl")): return working_dir - archive = WebTGZ(asset.browser_download_url) - archive.extract(working_dir) + retrieve(asset.browser_download_url, working_dir) logger.debug(f"Retrieved {asset.browser_download_url} to {working_dir}") return working_dir @@ -182,7 +181,7 @@ def install_package(asset_dir: pathlib.Path) -> int: logger.info(f"Installing package: {wheel_path.absolute()}") try: - pip("install", "--force-reinstall", str(wheel_path)) + pip("install", "--force-reinstall", str(wheel_path), "numpy<2") wheel_path = next(wheels, None) except Exception: logger.error(f"Unable to install from {asset_dir}") diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py index 96d46d6ee..b7905b773 100644 --- a/smartsim/_core/_cli/validate.py +++ b/smartsim/_core/_cli/validate.py @@ -27,7 +27,6 @@ import argparse import contextlib import io -import multiprocessing as mp import os import os.path import tempfile @@ -39,7 +38,7 @@ from smartsim import Experiment from smartsim._core._cli.utils import SMART_LOGGER_FORMAT -from smartsim._core._install.builder import Device +from smartsim._core.types import Device from smartsim._core.utils.helpers import installed_redisai_backends from smartsim._core.utils.network import find_free_port from smartsim.log import get_logger @@ -207,25 +206,8 @@ def _make_managed_local_orc( def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None: - recv_conn, send_conn = mp.Pipe(duplex=False) - # Build the model in a subproc so that keras does not hog the gpu - proc = mp.Process(target=_build_tf_frozen_model, args=(send_conn, tmp_dir)) - proc.start() - - # do not need the sending connection in this proc anymore - send_conn.close() - - proc.join(timeout=120) - if proc.is_alive(): - proc.terminate() - raise Exception("Failed to build a simple keras model within 2 minutes") - try: - model_path, inputs, outputs = recv_conn.recv() - except EOFError as e: - raise Exception( - "Failed to receive serialized model from subprocess. " - "Is the `tensorflow` python package installed?" - ) from e + + model_path, inputs, outputs = _build_tf_frozen_model(tmp_dir) client.set_model_from_file( "keras-fcn", @@ -240,8 +222,9 @@ def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None: client.get_tensor("keras-output") -def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None: - from tensorflow import keras +def _build_tf_frozen_model(tmp_dir: str) -> t.Tuple[str, t.List[str], t.List[str]]: + + from tensorflow import keras # pylint: disable=no-name-in-module from smartsim.ml.tf import freeze_model @@ -258,7 +241,7 @@ def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None: optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"] ) model_path, inputs, outputs = freeze_model(fcn, tmp_dir, "keras_model.pb") - conn.send((model_path, inputs, outputs)) + return model_path, inputs, outputs def _test_torch_install(client: Client, device: Device) -> None: @@ -283,10 +266,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: net.eval() forward_input = torch.rand(1, 1, 3, 3).to(device_) - traced = torch.jit.trace(net, forward_input) # type: ignore[no-untyped-call] + traced = torch.jit.trace( # type: ignore[no-untyped-call, unused-ignore] + net, forward_input + ) buffer = io.BytesIO() - torch.jit.save(traced, buffer) # type: ignore[no-untyped-call] + torch.jit.save(traced, buffer) # type: ignore[no-untyped-call, unused-ignore] model = buffer.getvalue() client.set_model("torch-nn", model, backend="TORCH", device=device.value.upper()) diff --git a/smartsim/_core/_install/buildenv.py b/smartsim/_core/_install/buildenv.py index e0cf5a522..bff421b12 100644 --- a/smartsim/_core/_install/buildenv.py +++ b/smartsim/_core/_install/buildenv.py @@ -35,25 +35,8 @@ from pathlib import Path from typing import Iterable -# NOTE: This will be imported by setup.py and hence no -# smartsim related items or non-standand library -# items should be imported here. +from packaging.version import InvalidVersion, Version, parse -# TODO: pkg_resources has been deprecated by PyPA. Currently we use it for its -# packaging implementation, as we cannot assume a user will have `packaging` -# prior to `pip install` time. We really only use pkg_resources for their -# vendored version of `packaging.version.Version` so we should probably try -# to remove -# https://setuptools.pypa.io/en/latest/pkg_resources.html - -# isort: off -import pkg_resources -from pkg_resources import packaging # type: ignore - -# isort: on - -Version = packaging.version.Version -InvalidVersion = packaging.version.InvalidVersion DbEngine = t.Literal["REDIS", "KEYDB"] @@ -72,30 +55,6 @@ class SetupError(Exception): """ -class VersionConflictError(SetupError): - """An error for when version numbers of some library/package/program/etc - do not match and build may not be able to continue - """ - - def __init__( - self, - name: str, - current_version: "Version_", - target_version: "Version_", - msg: t.Optional[str] = None, - ) -> None: - if msg is None: - msg = ( - f"Incompatible version for {name} detected: " - f"{name} {target_version} requested but {name} {current_version} " - "installed." - ) - super().__init__(msg) - self.name = name - self.current_version = current_version - self.target_version = target_version - - # so as to not conflict with pkg_resources.packaging.version.Version # pylint: disable-next=invalid-name class Version_(str): @@ -105,9 +64,7 @@ class Version_(str): @staticmethod def _convert_to_version( - vers: t.Union[ - str, Iterable[packaging.version.Version], packaging.version.Version - ], + vers: t.Union[str, Iterable[Version], Version], ) -> t.Any: if isinstance(vers, Version): return vers @@ -122,20 +79,20 @@ def _convert_to_version( def major(self) -> int: # Version(self).major doesn't work for all Python distributions # see https://github.com/lebedov/python-pdfbox/issues/28 - return int(pkg_resources.parse_version(self).base_version.split(".")[0]) + return int(parse(self).base_version.split(".", maxsplit=1)[0]) @property def minor(self) -> int: - return int(pkg_resources.parse_version(self).base_version.split(".")[1]) + return int(parse(self).base_version.split(".", maxsplit=2)[1]) @property def micro(self) -> int: - return int(pkg_resources.parse_version(self).base_version.split(".")[2]) + return int(parse(self).base_version.split(".", maxsplit=3)[2]) @property def patch(self) -> str: # return micro with string modifier i.e. 1.2.3+cpu -> 3+cpu - return str(pkg_resources.parse_version(self)).split(".")[2] + return str(parse(self)).split(".")[2] def __gt__(self, cmp: t.Any) -> bool: try: @@ -175,74 +132,6 @@ def get_env(var: str, default: str) -> str: return os.environ.get(var, default) -class RedisAIVersion(Version_): - """A subclass of Version_ that holds the dependency sets for RedisAI - - this class serves two purposes: - - 1. It is used to populate the [ml] ``extras_require`` of the setup.py. - This is because the RedisAI version will determine which ML based - dependencies are required. - - 2. Used to set the default values for PyTorch, TF, and ONNX - given the SMARTSIM_REDISAI env var set by the user. - - NOTE: Torch requires additional information depending on whether - CPU or GPU support is requested - """ - - defaults = { - "1.2.7": { - "tensorflow": "2.13.1", - "onnx": "1.14.1", - "skl2onnx": "1.16.0", - "onnxmltools": "1.12.0", - "scikit-learn": "1.3.2", - "torch": "2.0.1", - "torch_cpu_suffix": "+cpu", - "torch_cuda_suffix": "+cu117", - "torchvision": "0.15.2", - }, - } - - def __init__(self, vers: str) -> None: # pylint: disable=super-init-not-called - min_rai_version = min(Version_(ver) for ver in self.defaults) - if min_rai_version > vers: - raise SetupError( - f"RedisAI version must be greater than or equal to {min_rai_version}" - ) - if vers not in self.defaults: - if vers.startswith("1.2"): - # resolve to latest version for 1.2.x - # the str representation will still be 1.2.x - self.version = "1.2.7" - else: - raise SetupError( - ( - f"Invalid RedisAI version {vers}. Options are " - f"{self.defaults.keys()}" - ) - ) - else: - self.version = vers - - def __getattr__(self, name: str) -> str: - try: - return self.defaults[self.version][name] - except KeyError: - raise AttributeError( - f"'{type(self).__name__}' object has no attribute '{name}'\n\n" - "This is likely a problem with the SmartSim build process;" - "if this problem persists please log a new issue at " - "https://github.com/CrayLabs/SmartSim/issues " - "or get in contact with us at " - "https://www.craylabs.org/docs/community.html" - ) from None - - def get_defaults(self) -> t.Dict[str, str]: - return self.defaults[self.version].copy() - - class Versioner: """Versioner is responsible for managing all the versions within SmartSim including SmartSim itself. @@ -261,77 +150,36 @@ class Versioner: ``smart build`` command to determine which dependency versions to look for and download. - Default versions for SmartSim, SmartRedis, Redis, and RedisAI are - all set here. Setting a default version for RedisAI also dictates - default versions of the machine learning libraries. + Default versions for SmartSim, Redis, and RedisAI are specified here. """ # compatible Python version PYTHON_MIN = Version_("3.9.0") # Versions - SMARTSIM = Version_(get_env("SMARTSIM_VERSION", "0.7.0")) - SMARTREDIS = Version_(get_env("SMARTREDIS_VERSION", "0.5.3")) + SMARTSIM = Version_(get_env("SMARTSIM_VERSION", "0.8.0")) SMARTSIM_SUFFIX = get_env("SMARTSIM_SUFFIX", "") # Redis REDIS = Version_(get_env("SMARTSIM_REDIS", "7.2.4")) - REDIS_URL = get_env("SMARTSIM_REDIS_URL", "https://github.com/redis/redis.git/") + REDIS_URL = get_env("SMARTSIM_REDIS_URL", "https://github.com/redis/redis.git") REDIS_BRANCH = get_env("SMARTSIM_REDIS_BRANCH", REDIS) # RedisAI - REDISAI = RedisAIVersion(get_env("SMARTSIM_REDISAI", "1.2.7")) + REDISAI = "1.2.7" REDISAI_URL = get_env( - "SMARTSIM_REDISAI_URL", "https://github.com/RedisAI/RedisAI.git/" + "SMARTSIM_REDISAI_URL", "https://github.com/RedisAI/RedisAI.git" ) REDISAI_BRANCH = get_env("SMARTSIM_REDISAI_BRANCH", f"v{REDISAI}") - # ML/DL (based on RedisAI version defaults) - # torch can be set by the user because we download that for them - TORCH = Version_(get_env("SMARTSIM_TORCH", REDISAI.torch)) - TORCHVISION = Version_(get_env("SMARTSIM_TORCHVIS", REDISAI.torchvision)) - TORCH_CPU_SUFFIX = Version_(get_env("TORCH_CPU_SUFFIX", REDISAI.torch_cpu_suffix)) - TORCH_CUDA_SUFFIX = Version_( - get_env("TORCH_CUDA_SUFFIX", REDISAI.torch_cuda_suffix) - ) - - # TensorFlow and ONNX only use the defaults, but these are not built into - # the RedisAI package and therefore the user is free to pick other versions. - TENSORFLOW = Version_(REDISAI.tensorflow) - ONNX = Version_(REDISAI.onnx) - def as_dict(self, db_name: DbEngine = "REDIS") -> t.Dict[str, t.Tuple[str, ...]]: pkg_map = { "SMARTSIM": self.SMARTSIM, - "SMARTREDIS": self.SMARTREDIS, db_name: self.REDIS, "REDISAI": self.REDISAI, - "TORCH": self.TORCH, - "TENSORFLOW": self.TENSORFLOW, - "ONNX": self.ONNX, } return {"Packages": tuple(pkg_map), "Versions": tuple(pkg_map.values())} - def ml_extras_required(self) -> t.Dict[str, t.List[str]]: - """Optional ML/DL dependencies we suggest for the user. - - The defaults are based on the RedisAI version - """ - ml_defaults = self.REDISAI.get_defaults() - - # remove torch-related fields as they are subject to change - # by having the user change hardware (cpu/gpu) - _torch_fields = [ - "torch", - "torchvision", - "torch_cpu_suffix", - "torch_cuda_suffix", - ] - for field in _torch_fields: - ml_defaults.pop(field) - - return {"ml": [f"{lib}=={vers}" for lib, vers in ml_defaults.items()]} - @staticmethod def get_sha(setup_py_dir: Path) -> str: """Get the git sha of the current branch""" @@ -406,7 +254,7 @@ def __init__(self, checks: bool = True) -> None: self.check_dependencies() def check_dependencies(self) -> None: - deps = ["git", "git-lfs", "make", "wget", "cmake", self.CC, self.CXX] + deps = ["git", "make", "wget", "cmake", self.CC, self.CXX] if int(self.CHECKS) == 0: for dep in deps: self.check_build_dependency(dep) @@ -519,23 +367,6 @@ def check_build_dependency(command: str) -> None: except OSError: raise SetupError(f"{command} must be installed to build SmartSim") from None - @classmethod - def check_installed( - cls, package: str, version: t.Optional[Version_] = None - ) -> bool: - """Check if a package is installed. If version is provided, check if - it's a compatible version. (major and minor the same) - """ - try: - installed = cls.get_py_package_version(package) - except importlib.metadata.PackageNotFoundError: - return False - if version: - # detect if major or minor versions differ - if installed.major != version.major or installed.minor != version.minor: - raise VersionConflictError(package, installed, version) - return True - @staticmethod def get_py_package_version(package: str) -> Version_: return Version_(importlib.metadata.version(package)) diff --git a/smartsim/_core/_install/builder.py b/smartsim/_core/_install/builder.py index fb8ec5b81..17036e825 100644 --- a/smartsim/_core/_install/builder.py +++ b/smartsim/_core/_install/builder.py @@ -26,98 +26,32 @@ # pylint: disable=too-many-lines -import concurrent.futures -import enum -import fileinput -import itertools import os -import platform import re import shutil import stat import subprocess -import sys -import tarfile -import tempfile import typing as t -import urllib.request -import zipfile -from abc import ABC, abstractmethod -from dataclasses import dataclass from pathlib import Path -from shutil import which from subprocess import SubprocessError -# NOTE: This will be imported by setup.py and hence no smartsim related -# items should be imported into this file. +from smartsim._core._install.utils import retrieve +from smartsim._core.utils import expand_exe_path + +if t.TYPE_CHECKING: + from typing_extensions import Never # TODO: check cmake version and use system if possible to avoid conflicts -TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime", "tflite"] _PathLike = t.Union[str, "os.PathLike[str]"] _T = t.TypeVar("_T") _U = t.TypeVar("_U") -def expand_exe_path(exe: str) -> str: - """Takes an executable and returns the full path to that executable - - :param exe: executable or file - :raises TypeError: if file is not an executable - :raises FileNotFoundError: if executable cannot be found - """ - - # which returns none if not found - in_path = which(exe) - if not in_path: - if os.path.isfile(exe) and os.access(exe, os.X_OK): - return os.path.abspath(exe) - if os.path.isfile(exe) and not os.access(exe, os.X_OK): - raise TypeError(f"File, {exe}, is not an executable") - raise FileNotFoundError(f"Could not locate executable {exe}") - return os.path.abspath(in_path) - - class BuildError(Exception): pass -class Architecture(enum.Enum): - X64 = ("x86_64", "amd64") - ARM64 = ("arm64",) - - @classmethod - def from_str(cls, string: str, /) -> "Architecture": - string = string.lower() - for type_ in cls: - if string in type_.value: - return type_ - raise BuildError(f"Unrecognized or unsupported architecture: {string}") - - -class Device(enum.Enum): - CPU = "cpu" - GPU = "gpu" - - -class OperatingSystem(enum.Enum): - LINUX = ("linux", "linux2") - DARWIN = ("darwin",) - - @classmethod - def from_str(cls, string: str, /) -> "OperatingSystem": - string = string.lower() - for type_ in cls: - if string in type_.value: - return type_ - raise BuildError(f"Unrecognized or unsupported operating system: {string}") - - -class Platform(t.NamedTuple): - os: OperatingSystem - architecture: Architecture - - class Builder: """Base class for building third-party libraries""" @@ -135,13 +69,10 @@ def __init__( self, env: t.Dict[str, str], jobs: int = 1, - _os: OperatingSystem = OperatingSystem.from_str(platform.system()), - architecture: Architecture = Architecture.from_str(platform.machine()), verbose: bool = False, ) -> None: # build environment from buildenv self.env = env - self._platform = Platform(_os, architecture) # Find _core directory and set up paths _core_dir = Path(os.path.abspath(__file__)).parent.parent @@ -176,11 +107,6 @@ def out(self) -> t.Optional[int]: def is_built(self) -> bool: raise NotImplementedError - def build_from_git( - self, git_url: str, branch: str, device: Device = Device.CPU - ) -> None: - raise NotImplementedError - @staticmethod def binary_path(binary: str) -> str: binary_ = shutil.which(binary) @@ -256,15 +182,11 @@ def __init__( build_env: t.Optional[t.Dict[str, str]] = None, malloc: str = "libc", jobs: int = 1, - _os: OperatingSystem = OperatingSystem.from_str(platform.system()), - architecture: Architecture = Architecture.from_str(platform.machine()), verbose: bool = False, ) -> None: super().__init__( build_env or {}, jobs=jobs, - _os=_os, - architecture=architecture, verbose=verbose, ) self.malloc = malloc @@ -277,9 +199,7 @@ def is_built(self) -> bool: keydb_files = {"keydb-server", "keydb-cli"} return redis_files.issubset(bin_files) or keydb_files.issubset(bin_files) - def build_from_git( - self, git_url: str, branch: str, device: Device = Device.CPU - ) -> None: + def build_from_git(self, git_url: str, branch: str) -> None: """Build Redis from git :param git_url: url from which to retrieve Redis :param branch: branch to checkout @@ -301,23 +221,7 @@ def build_from_git( if not self.is_valid_url(git_url): raise BuildError(f"Malformed {database_name} URL: {git_url}") - clone_cmd = config_git_command( - self._platform, - [ - self.binary_path("git"), - "clone", - git_url, - "--branch", - branch, - "--depth", - "1", - database_name, - ], - ) - - # clone Redis - self.run_command(clone_cmd, cwd=self.build_dir) - + retrieve(git_url, self.build_dir / database_name, branch=branch, depth=1) # build Redis build_cmd = [ self.binary_path("make"), @@ -354,723 +258,3 @@ def build_from_git( _ = expand_exe_path(str(redis_cli)) except (TypeError, FileNotFoundError) as e: raise BuildError("Installation of redis-cli failed!") from e - - -class _RAIBuildDependency(ABC): - """An interface with a collection of magic methods so that - ``RedisAIBuilder`` can fetch and place its own dependencies - """ - - @property - @abstractmethod - def __rai_dependency_name__(self) -> str: ... - - @abstractmethod - def __place_for_rai__(self, target: _PathLike) -> Path: ... - - @staticmethod - @abstractmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: ... - - -def _place_rai_dep_at( - target: _PathLike, verbose: bool -) -> t.Callable[[_RAIBuildDependency], Path]: - def _place(dep: _RAIBuildDependency) -> Path: - if verbose: - print(f"Placing: '{dep.__rai_dependency_name__}'") - path = dep.__place_for_rai__(target) - if verbose: - print(f"Placed: '{dep.__rai_dependency_name__}' at '{path}'") - return path - - return _place - - -class RedisAIBuilder(Builder): - """Class to build RedisAI from Source - Supported build method: - - from git - See buildenv.py for buildtime configuration of RedisAI - version and url. - """ - - def __init__( - self, - _os: OperatingSystem = OperatingSystem.from_str(platform.system()), - architecture: Architecture = Architecture.from_str(platform.machine()), - build_env: t.Optional[t.Dict[str, str]] = None, - torch_dir: str = "", - libtf_dir: str = "", - build_torch: bool = True, - build_tf: bool = True, - build_onnx: bool = False, - jobs: int = 1, - verbose: bool = False, - torch_with_mkl: bool = True, - ) -> None: - super().__init__( - build_env or {}, - jobs=jobs, - _os=_os, - architecture=architecture, - verbose=verbose, - ) - - self.rai_install_path: t.Optional[Path] = None - - # convert to int for RAI build script - self._torch = build_torch - self._tf = build_tf - self._onnx = build_onnx - self.libtf_dir = libtf_dir - self.torch_dir = torch_dir - - # extra configuration options - self.torch_with_mkl = torch_with_mkl - - # Sanity checks - self._validate_platform() - - def _validate_platform(self) -> None: - unsupported = [] - if self._platform not in _DLPackRepository.supported_platforms(): - unsupported.append("DLPack") - if self.fetch_tf and (self._platform not in _TFArchive.supported_platforms()): - unsupported.append("Tensorflow") - if self.fetch_onnx and ( - self._platform not in _ORTArchive.supported_platforms() - ): - unsupported.append("ONNX") - if self.fetch_torch and ( - self._platform not in _PTArchive.supported_platforms() - ): - unsupported.append("PyTorch") - if unsupported: - raise BuildError( - f"The {', '.join(unsupported)} backend(s) are not supported " - f"on {self._platform.os} with {self._platform.architecture}" - ) - - @property - def rai_build_path(self) -> Path: - return Path(self.build_dir, "RedisAI") - - @property - def is_built(self) -> bool: - server = self.lib_path.joinpath("backends").is_dir() - cli = self.lib_path.joinpath("redisai.so").is_file() - return server and cli - - @property - def build_torch(self) -> bool: - return self._torch - - @property - def fetch_torch(self) -> bool: - return self.build_torch and not self.torch_dir - - @property - def build_tf(self) -> bool: - return self._tf - - @property - def fetch_tf(self) -> bool: - return self.build_tf and not self.libtf_dir - - @property - def build_onnx(self) -> bool: - return self._onnx - - @property - def fetch_onnx(self) -> bool: - return self.build_onnx - - def get_deps_dir_path_for(self, device: Device) -> Path: - def fail_to_format(reason: str) -> BuildError: # pragma: no cover - return BuildError(f"Failed to format RedisAI dependency path: {reason}") - - _os, architecture = self._platform - if _os == OperatingSystem.DARWIN: - os_ = "macos" - elif _os == OperatingSystem.LINUX: - os_ = "linux" - else: # pragma: no cover - raise fail_to_format(f"Unknown operating system: {_os}") - if architecture == Architecture.X64: - arch = "x64" - elif architecture == Architecture.ARM64: - arch = "arm64v8" - else: # pragma: no cover - raise fail_to_format(f"Unknown architecture: {architecture}") - return self.rai_build_path / f"deps/{os_}-{arch}-{device.value}" - - def _get_deps_to_fetch_for( - self, device: Device - ) -> t.Tuple[_RAIBuildDependency, ...]: - os_, arch = self._platform - # TODO: It would be nice if the backend version numbers were declared - # alongside the python package version numbers so that all of the - # dependency versions were declared in single location. - # Unfortunately importing into this module is non-trivial as it - # is used as script in the SmartSim `setup.py`. - - # DLPack is always required - fetchable_deps: t.List[_RAIBuildDependency] = [_DLPackRepository("v0.5_RAI")] - if self.fetch_torch: - pt_dep = _choose_pt_variant(os_)(arch, device, "2.0.1", self.torch_with_mkl) - fetchable_deps.append(pt_dep) - if self.fetch_tf: - fetchable_deps.append(_TFArchive(os_, arch, device, "2.13.1")) - if self.fetch_onnx: - fetchable_deps.append(_ORTArchive(os_, device, "1.16.3")) - - return tuple(fetchable_deps) - - def symlink_libtf(self, device: Device) -> None: - """Add symbolic link to available libtensorflow in RedisAI deps. - - :param device: cpu or gpu - """ - rai_deps_path = sorted( - self.rai_build_path.glob(os.path.join("deps", f"*{device.value}*")) - ) - if not rai_deps_path: - raise FileNotFoundError("Could not find RedisAI 'deps' directory") - - # There should only be one path for a given device, - # and this should hold even if in the future we use - # an external build of RedisAI - rai_libtf_path = rai_deps_path[0] / "libtensorflow" - rai_libtf_path.resolve() - if rai_libtf_path.is_dir(): - shutil.rmtree(rai_libtf_path) - - os.makedirs(rai_libtf_path) - libtf_path = Path(self.libtf_dir).resolve() - - # Copy include directory to deps/libtensorflow - include_src_path = libtf_path / "include" - if not include_src_path.exists(): - raise FileNotFoundError(f"Could not find include directory in {libtf_path}") - os.symlink(include_src_path, rai_libtf_path / "include") - - # RedisAI expects to find a lib directory, which is only - # available in some distributions. - rai_libtf_lib_dir = rai_libtf_path / "lib" - os.makedirs(rai_libtf_lib_dir) - src_libtf_lib_dir = libtf_path / "lib" - # If the lib directory existed in the libtensorflow distribution, - # copy its content, otherwise gather library files from - # libtensorflow base dir and copy them into destination lib dir - if src_libtf_lib_dir.is_dir(): - library_files = sorted(src_libtf_lib_dir.glob("*")) - if not library_files: - raise FileNotFoundError( - f"Could not find libtensorflow library files in {src_libtf_lib_dir}" - ) - else: - library_files = sorted(libtf_path.glob("lib*.so*")) - if not library_files: - raise FileNotFoundError( - f"Could not find libtensorflow library files in {libtf_path}" - ) - - for src_file in library_files: - dst_file = rai_libtf_lib_dir / src_file.name - if not dst_file.is_file(): - os.symlink(src_file, dst_file) - - def build_from_git( - self, git_url: str, branch: str, device: Device = Device.CPU - ) -> None: - """Build RedisAI from git - - :param git_url: url from which to retrieve RedisAI - :param branch: branch to checkout - :param device: cpu or gpu - """ - # delete previous build dir (should never be there) - if self.rai_build_path.is_dir(): - shutil.rmtree(self.rai_build_path) - - # Check RedisAI URL - if not self.is_valid_url(git_url): - raise BuildError(f"Malformed RedisAI URL: {git_url}") - - # clone RedisAI - clone_cmd = config_git_command( - self._platform, - [ - self.binary_path("env"), - "GIT_LFS_SKIP_SMUDGE=1", - "git", - "clone", - "--recursive", - git_url, - "--branch", - branch, - "--depth=1", - os.fspath(self.rai_build_path), - ], - ) - - self.run_command(clone_cmd, out=subprocess.DEVNULL, cwd=self.build_dir) - self._fetch_deps_for(device) - - if self.libtf_dir and device.value: - self.symlink_libtf(device) - - build_cmd = self._rai_build_env_prefix( - with_pt=self.build_torch, - with_tf=self.build_tf, - with_ort=self.build_onnx, - extra_env={"GPU": "1" if device == Device.GPU else "0"}, - ) - - if self.torch_dir: - self.env["Torch_DIR"] = str(self.torch_dir) - - build_cmd.extend( - [ - self.binary_path("make"), - "-C", - str(self.rai_build_path / "opt"), - "-j", - f"{self.jobs}", - "build", - ] - ) - self.run_command(build_cmd, cwd=self.rai_build_path) - - self._install_backends(device) - if self.user_supplied_backend("torch"): - self._move_torch_libs() - self.cleanup() - - def user_supplied_backend(self, backend: TRedisAIBackendStr) -> bool: - if backend == "torch": - return bool(self.build_torch and not self.fetch_torch) - if backend == "tensorflow": - return bool(self.build_tf and not self.fetch_tf) - if backend == "onnxruntime": - return bool(self.build_onnx and not self.fetch_onnx) - if backend == "tflite": - return False - raise BuildError(f"Unrecognized backend requested {backend}") - - def _rai_build_env_prefix( - self, - with_tf: bool, - with_pt: bool, - with_ort: bool, - extra_env: t.Optional[t.Dict[str, str]] = None, - ) -> t.List[str]: - extra_env = extra_env or {} - return [ - self.binary_path("env"), - f"WITH_PT={1 if with_pt else 0}", - f"WITH_TF={1 if with_tf else 0}", - "WITH_TFLITE=0", # never use TF Lite (for now) - f"WITH_ORT={1 if with_ort else 0}", - *(f"{key}={val}" for key, val in extra_env.items()), - ] - - def _fetch_deps_for(self, device: Device) -> None: - if not self.rai_build_path.is_dir(): - raise BuildError("RedisAI build directory not found") - - deps_dir = self.get_deps_dir_path_for(device) - deps_dir.mkdir(parents=True, exist_ok=True) - if any(deps_dir.iterdir()): - raise BuildError("RAI build dependency directory is not empty") - to_fetch = self._get_deps_to_fetch_for(device) - placed_paths = _threaded_map( - _place_rai_dep_at(deps_dir, self.verbose), to_fetch - ) - unique_placed_paths = {os.fspath(path.resolve()) for path in placed_paths} - if len(unique_placed_paths) != len(to_fetch): - raise BuildError( - f"Expected to place {len(to_fetch)} dependencies, but only " - f"found {len(unique_placed_paths)}" - ) - - def _install_backends(self, device: Device) -> None: - """Move backend libraries to smartsim/_core/lib/ - :param device: cpu or cpu - """ - self.rai_install_path = self.rai_build_path.joinpath( - f"install-{device.value}" - ).resolve() - rai_lib = self.rai_install_path / "redisai.so" - rai_backends = self.rai_install_path / "backends" - - if rai_lib.is_file() and rai_backends.is_dir(): - self.copy_dir(rai_backends, self.lib_path / "backends", set_exe=True) - self.copy_file(rai_lib, self.lib_path / "redisai.so", set_exe=True) - - def _move_torch_libs(self) -> None: - """Move pip install torch libraries - Since we use pip installed torch libraries for building - RedisAI, we need to move them into the LD_runpath of redisai.so - in the smartsim/_core/lib directory. - """ - ss_rai_torch_path = self.lib_path / "backends" / "redisai_torch" - ss_rai_torch_lib_path = ss_rai_torch_path / "lib" - - # retrieve torch shared libraries and copy to the - # smartsim/_core/lib/backends/redisai_torch/lib dir - # self.torch_dir should be /path/to/torch/share/cmake/Torch - # so we take the great grandparent here - pip_torch_path = Path(self.torch_dir).parent.parent.parent - pip_torch_lib_path = pip_torch_path / "lib" - - self.copy_dir(pip_torch_lib_path, ss_rai_torch_lib_path, set_exe=True) - - # also move the openmp files if on a mac - if sys.platform == "darwin": - dylibs = pip_torch_path / ".dylibs" - self.copy_dir(dylibs, ss_rai_torch_path / ".dylibs", set_exe=True) - - -def _threaded_map(fn: t.Callable[[_T], _U], items: t.Iterable[_T]) -> t.Sequence[_U]: - items = tuple(items) - if not items: # No items so no work to do - return () - num_workers = min(len(items), (os.cpu_count() or 4) * 5) - with concurrent.futures.ThreadPoolExecutor(num_workers) as pool: - return tuple(pool.map(fn, items)) - - -class _WebLocation(ABC): - @property - @abstractmethod - def url(self) -> str: ... - - -class _WebGitRepository(_WebLocation): - def clone( - self, - target: _PathLike, - depth: t.Optional[int] = None, - branch: t.Optional[str] = None, - ) -> None: - depth_ = ("--depth", str(depth)) if depth is not None else () - branch_ = ("--branch", branch) if branch is not None else () - _git("clone", "-q", *depth_, *branch_, self.url, os.fspath(target)) - - -@t.final -@dataclass(frozen=True) -class _DLPackRepository(_WebGitRepository, _RAIBuildDependency): - version: str - - @staticmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: - return ( - (OperatingSystem.LINUX, Architecture.X64), - (OperatingSystem.DARWIN, Architecture.X64), - (OperatingSystem.DARWIN, Architecture.ARM64), - ) - - @property - def url(self) -> str: - return "https://github.com/RedisAI/dlpack.git" - - @property - def __rai_dependency_name__(self) -> str: - return f"dlpack@{self.url}" - - def __place_for_rai__(self, target: _PathLike) -> Path: - target = Path(target) / "dlpack" - self.clone(target, branch=self.version, depth=1) - if not target.is_dir(): - raise BuildError("Failed to place dlpack") - return target - - -class _WebArchive(_WebLocation): - @property - def name(self) -> str: - _, name = self.url.rsplit("/", 1) - return name - - def download(self, target: _PathLike) -> Path: - target = Path(target) - if target.is_dir(): - target = target / self.name - file, _ = urllib.request.urlretrieve(self.url, target) - return Path(file).resolve() - - -class _ExtractableWebArchive(_WebArchive, ABC): - @abstractmethod - def _extract_download(self, download_path: Path, target: _PathLike) -> None: ... - - def extract(self, target: _PathLike) -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - arch_path = self.download(tmp_dir) - self._extract_download(arch_path, target) - - -class _WebTGZ(_ExtractableWebArchive): - def _extract_download(self, download_path: Path, target: _PathLike) -> None: - with tarfile.open(download_path, "r") as tgz_file: - tgz_file.extractall(target) - - -class _WebZip(_ExtractableWebArchive): - def _extract_download(self, download_path: Path, target: _PathLike) -> None: - with zipfile.ZipFile(download_path, "r") as zip_file: - zip_file.extractall(target) - - -class WebTGZ(_WebTGZ): - def __init__(self, url: str) -> None: - self._url = url - - @property - def url(self) -> str: - return self._url - - -@dataclass(frozen=True) -class _PTArchive(_WebZip, _RAIBuildDependency): - architecture: Architecture - device: Device - version: str - with_mkl: bool - - @staticmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: - # TODO: This will need to be revisited if the inheritance tree gets deeper - return tuple( - itertools.chain.from_iterable( - var.supported_platforms() for var in _PTArchive.__subclasses__() - ) - ) - - @property - def __rai_dependency_name__(self) -> str: - return f"libtorch@{self.url}" - - @staticmethod - def _patch_out_mkl(libtorch_root: Path) -> None: - _modify_source_files( - libtorch_root / "share/cmake/Caffe2/public/mkl.cmake", - r"find_package\(MKL QUIET\)", - "# find_package(MKL QUIET)", - ) - - def extract(self, target: _PathLike) -> None: - super().extract(target) - if not self.with_mkl: - self._patch_out_mkl(Path(target)) - - def __place_for_rai__(self, target: _PathLike) -> Path: - self.extract(target) - target = Path(target) / "libtorch" - if not target.is_dir(): - raise BuildError("Failed to place RAI dependency: `libtorch`") - return target - - -@t.final -class _PTArchiveLinux(_PTArchive): - @staticmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: - return ((OperatingSystem.LINUX, Architecture.X64),) - - @property - def url(self) -> str: - if self.device == Device.GPU: - pt_build = "cu117" - else: - pt_build = Device.CPU.value - # pylint: disable-next=line-too-long - libtorch_archive = ( - f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip" - ) - return f"https://download.pytorch.org/libtorch/{pt_build}/{libtorch_archive}" - - -@t.final -class _PTArchiveMacOSX(_PTArchive): - @staticmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: - return ( - (OperatingSystem.DARWIN, Architecture.ARM64), - (OperatingSystem.DARWIN, Architecture.X64), - ) - - @property - def url(self) -> str: - if self.device == Device.GPU: - raise BuildError("RedisAI does not currently support GPU on Mac OSX") - if self.architecture == Architecture.X64: - pt_build = Device.CPU.value - libtorch_archive = f"libtorch-macos-{self.version}.zip" - root_url = "https://download.pytorch.org/libtorch" - return f"{root_url}/{pt_build}/{libtorch_archive}" - if self.architecture == Architecture.ARM64: - libtorch_archive = f"libtorch-macos-arm64-{self.version}.zip" - # pylint: disable-next=line-too-long - root_url = ( - "https://github.com/CrayLabs/ml_lib_builder/releases/download/v0.1/" - ) - return f"{root_url}/{libtorch_archive}" - - raise BuildError(f"Unsupported architecture for Pytorch: {self.architecture}") - - -def _choose_pt_variant( - os_: OperatingSystem, -) -> t.Union[t.Type[_PTArchiveLinux], t.Type[_PTArchiveMacOSX]]: - if os_ == OperatingSystem.DARWIN: - return _PTArchiveMacOSX - if os_ == OperatingSystem.LINUX: - return _PTArchiveLinux - - raise BuildError(f"Unsupported OS for PyTorch: {os_}") - - -@t.final -@dataclass(frozen=True) -class _TFArchive(_WebTGZ, _RAIBuildDependency): - os_: OperatingSystem - architecture: Architecture - device: Device - version: str - - @staticmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: - return ( - (OperatingSystem.LINUX, Architecture.X64), - (OperatingSystem.DARWIN, Architecture.X64), - ) - - @property - def url(self) -> str: - if self.architecture == Architecture.X64: - tf_arch = "x86_64" - else: - raise BuildError( - f"Unexpected Architecture for TF Archive: {self.architecture}" - ) - - if self.os_ == OperatingSystem.LINUX: - tf_os = "linux" - tf_device = self.device - elif self.os_ == OperatingSystem.DARWIN: - tf_os = "darwin" - if self.device == Device.GPU: - raise BuildError("RedisAI does not currently support GPU on Macos") - tf_device = Device.CPU - else: - raise BuildError(f"Unexpected OS for TF Archive: {self.os_}") - return ( - "https://storage.googleapis.com/tensorflow/libtensorflow/" - f"libtensorflow-{tf_device.value}-{tf_os}-{tf_arch}-{self.version}.tar.gz" - ) - - @property - def __rai_dependency_name__(self) -> str: - return f"libtensorflow@{self.url}" - - def __place_for_rai__(self, target: _PathLike) -> Path: - target = Path(target) / "libtensorflow" - target.mkdir() - self.extract(target) - return target - - -@t.final -@dataclass(frozen=True) -class _ORTArchive(_WebTGZ, _RAIBuildDependency): - os_: OperatingSystem - device: Device - version: str - - @staticmethod - def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: - return ( - (OperatingSystem.LINUX, Architecture.X64), - (OperatingSystem.DARWIN, Architecture.X64), - ) - - @property - def url(self) -> str: - ort_url_base = ( - "https://github.com/microsoft/onnxruntime/releases/" - f"download/v{self.version}" - ) - if self.os_ == OperatingSystem.LINUX: - ort_os = "linux" - ort_arch = "x64" - ort_build = "-gpu" if self.device == Device.GPU else "" - elif self.os_ == OperatingSystem.DARWIN: - ort_os = "osx" - ort_arch = "x86_64" - ort_build = "" - if self.device == Device.GPU: - raise BuildError("RedisAI does not currently support GPU on Macos") - else: - raise BuildError(f"Unexpected OS for TF Archive: {self.os_}") - ort_archive = f"onnxruntime-{ort_os}-{ort_arch}{ort_build}-{self.version}.tgz" - return f"{ort_url_base}/{ort_archive}" - - @property - def __rai_dependency_name__(self) -> str: - return f"onnxruntime@{self.url}" - - def __place_for_rai__(self, target: _PathLike) -> Path: - target = Path(target).resolve() / "onnxruntime" - self.extract(target) - try: - (extracted_dir,) = target.iterdir() - except ValueError: - raise BuildError( - "Unexpected number of files extracted from ORT archive" - ) from None - for file in extracted_dir.iterdir(): - file.rename(target / file.name) - extracted_dir.rmdir() - return target - - -def _git(*args: str) -> None: - git = Builder.binary_path("git") - cmd = (git,) + args - with subprocess.Popen(cmd) as proc: - proc.wait() - if proc.returncode != 0: - raise BuildError( - f"Command `{' '.join(cmd)}` failed with exit code {proc.returncode}" - ) - - -def config_git_command(plat: Platform, cmd: t.Sequence[str]) -> t.List[str]: - """Modify git commands to include autocrlf when on a platform that needs - autocrlf enabled to behave correctly - """ - cmd = list(cmd) - where = next((i for i, tok in enumerate(cmd) if tok.endswith("git")), len(cmd)) + 2 - if where >= len(cmd): - raise ValueError(f"Failed to locate git command in '{' '.join(cmd)}'") - if plat == Platform(OperatingSystem.DARWIN, Architecture.ARM64): - cmd = ( - cmd[:where] - + ["--config", "core.autocrlf=false", "--config", "core.eol=lf"] - + cmd[where:] - ) - return cmd - - -def _modify_source_files( - files: t.Union[_PathLike, t.Iterable[_PathLike]], regex: str, replacement: str -) -> None: - compiled_regex = re.compile(regex) - with fileinput.input(files=files, inplace=True) as handles: - for line in handles: - line = compiled_regex.sub(replacement, line) - print(line, end="") diff --git a/smartsim/_core/_install/configs/mlpackages/DarwinARM64CPU.json b/smartsim/_core/_install/configs/mlpackages/DarwinARM64CPU.json new file mode 100644 index 000000000..2f49a393e --- /dev/null +++ b/smartsim/_core/_install/configs/mlpackages/DarwinARM64CPU.json @@ -0,0 +1,47 @@ +{ + "platform": { + "operating_system":"darwin", + "architecture":"arm64", + "device":"cpu" + }, + "ml_packages": [ + { + "name": "dlpack", + "version": "v0.5_RAI", + "pip_index": "", + "python_packages": [], + "lib_source": "https://github.com/RedisAI/dlpack.git" + }, + { + "name": "libtorch", + "version": "2.4.0", + "pip_index": "", + "python_packages": [ + "torch==2.4.0", + "torchvision==0.19.0", + "torchaudio==2.4.0" + ], + "lib_source": "https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.4.0.zip", + "rai_patches": [ + { + "description": "Patch RedisAI module to require C++17 standard instead of C++14", + "source_file": "src/backends/libtorch_c/CMakeLists.txt", + "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)", + "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)" + } + ] + }, + { + "name": "onnxruntime", + "version": "1.17.3", + "pip_index": "", + "python_packages": [ + "onnx==1.15", + "skl2onnx", + "scikit-learn", + "onnxmltools" + ], + "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-osx-arm64-1.17.3.tgz" + } + ] +} diff --git a/smartsim/_core/_install/configs/mlpackages/DarwinX64CPU.json b/smartsim/_core/_install/configs/mlpackages/DarwinX64CPU.json new file mode 100644 index 000000000..e7b67e35b --- /dev/null +++ b/smartsim/_core/_install/configs/mlpackages/DarwinX64CPU.json @@ -0,0 +1,56 @@ +{ + "platform": { + "operating_system":"darwin", + "architecture":"x86_64", + "device":"cpu" + }, + "ml_packages": [ + { + "name": "dlpack", + "version": "v0.5_RAI", + "pip_index": "", + "python_packages": [], + "lib_source": "https://github.com/RedisAI/dlpack.git" + }, + { + "name": "libtorch", + "version": "2.2.2", + "pip_index": "", + "python_packages": [ + "torch==2.2.2", + "torchvision==0.17.2", + "torchaudio==2.2.2" + ], + "lib_source": "https://download.pytorch.org/libtorch/cpu/libtorch-macos-x86_64-2.2.2.zip", + "rai_patches": [ + { + "description": "Patch RedisAI module to require C++17 standard instead of C++14", + "source_file": "src/backends/libtorch_c/CMakeLists.txt", + "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)", + "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)" + } + ] + }, + { + "name": "libtensorflow", + "version": "2.15", + "pip_index": "", + "python_packages": [ + "tensorflow==2.15" + ], + "lib_source": "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.15.0.tar.gz" + }, + { + "name": "onnxruntime", + "version": "1.17.3", + "pip_index": "", + "python_packages": [ + "onnx==1.15", + "skl2onnx", + "scikit-learn", + "onnxmltools" + ], + "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-osx-x86_64-1.17.3.tgz" + } + ] +} diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64CPU.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64CPU.json new file mode 100644 index 000000000..cc2f81194 --- /dev/null +++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64CPU.json @@ -0,0 +1,56 @@ +{ + "platform": { + "operating_system":"linux", + "architecture":"x86_64", + "device":"cpu" + }, + "ml_packages": [ + { + "name": "dlpack", + "version": "v0.5_RAI", + "pip_index": "", + "python_packages": [], + "lib_source": "https://github.com/RedisAI/dlpack.git" + }, + { + "name": "libtorch", + "version": "2.4.0", + "pip_index": "https://download.pytorch.org/whl/cpu", + "python_packages": [ + "torch==2.4.0+cpu", + "torchvision==0.19.0+cpu", + "torchaudio==2.4.0+cpu" + ], + "lib_source": "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcpu.zip", + "rai_patches": [ + { + "description": "Patch RedisAI module to require C++17 standard instead of C++14", + "source_file": "src/backends/libtorch_c/CMakeLists.txt", + "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)", + "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)" + } + ] + }, + { + "name": "libtensorflow", + "version": "2.15", + "pip_index": "", + "python_packages": [ + "tensorflow==2.15" + ], + "lib_source": "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.15.0.tar.gz" + }, + { + "name": "onnxruntime", + "version": "1.17.3", + "pip_index": "", + "python_packages": [ + "onnx<=1.15", + "skl2onnx", + "scikit-learn", + "onnxmltools" + ], + "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-1.17.3.tgz" + } + ] +} diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA11.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA11.json new file mode 100644 index 000000000..cf302534c --- /dev/null +++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA11.json @@ -0,0 +1,56 @@ +{ + "platform": { + "operating_system":"linux", + "architecture":"x86_64", + "device":"cuda-11" + }, + "ml_packages": [ + { + "name": "dlpack", + "version": "v0.5_RAI", + "pip_index": "", + "python_packages": [], + "lib_source": "https://github.com/RedisAI/dlpack.git" + }, + { + "name": "libtorch", + "version": "2.3.1", + "pip_index": "https://download.pytorch.org/whl/cu118", + "python_packages": [ + "torch==2.3.1+cu118", + "torchvision==0.18.1+cu118", + "torchaudio==2.3.1+cu118" + ], + "lib_source": "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu118.zip", + "rai_patches": [ + { + "description": "Patch RedisAI module to require C++17 standard instead of C++14", + "source_file": "src/backends/libtorch_c/CMakeLists.txt", + "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)", + "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)" + } + ] + }, + { + "name": "libtensorflow", + "version": "2.14.1", + "pip_index": "", + "python_packages": [ + "tensorflow==2.14.1" + ], + "lib_source": "https://github.com/CrayLabs/ml_lib_builder/releases/download/v0.2/libtensorflow-2.14.1-linux-x64-cuda-11.8.0.tgz" + }, + { + "name": "onnxruntime", + "version": "1.17.3", + "pip_index": "", + "python_packages": [ + "onnx==1.15", + "skl2onnx", + "scikit-learn", + "onnxmltools" + ], + "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-1.17.3.tgz" + } + ] +} diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA12.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA12.json new file mode 100644 index 000000000..a415b3103 --- /dev/null +++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA12.json @@ -0,0 +1,64 @@ +{ + "platform": { + "operating_system":"linux", + "architecture":"x86_64", + "device":"cuda-12" + }, + "ml_packages": [ + { + "name": "dlpack", + "version": "v0.5_RAI", + "pip_index": "", + "python_packages": [], + "lib_source": "https://github.com/RedisAI/dlpack.git" + }, + { + "name": "libtorch", + "version": "2.3.1", + "pip_index": "https://download.pytorch.org/whl/cu121", + "python_packages": [ + "torch==2.3.1+cu121", + "torchvision==0.18.1+cu121", + "torchaudio==2.3.1+cu121" + ], + "lib_source": "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu121.zip", + "rai_patches": [ + { + "description": "Patch RedisAI module to require C++17 standard instead of C++14", + "source_file": "src/backends/libtorch_c/CMakeLists.txt", + "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)", + "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)" + } + ] + }, + { + "name": "libtensorflow", + "version": "2.15", + "pip_index": "", + "python_packages": [ + "tensorflow==2.15" + ], + "lib_source": "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.15.0.tar.gz", + "rai_patches": [ + { + "description": "Patch RedisAI to point to correct tsl directory", + "source_file": "CMakeLists.txt", + "regex": "INCLUDE_DIRECTORIES\\(\\$\\{depsAbs\\}/libtensorflow/include\\)", + "replacement": "INCLUDE_DIRECTORIES(${depsAbs}/libtensorflow/include ${depsAbs}/libtensorflow/include/external/local_tsl)" + } + ] + }, + { + "name": "onnxruntime", + "version": "1.17.3", + "pip_index": "", + "python_packages": [ + "onnx==1.15", + "skl2onnx", + "scikit-learn", + "onnxmltools" + ], + "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-cuda12-1.17.3.tgz" + } + ] +} diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64ROCM6.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64ROCM6.json new file mode 100644 index 000000000..b4673e901 --- /dev/null +++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64ROCM6.json @@ -0,0 +1,47 @@ +{ + "platform": { + "operating_system":"linux", + "architecture":"x86_64", + "device":"rocm-6" + }, + "ml_packages": [ + { + "name": "dlpack", + "version": "v0.5_RAI", + "pip_index": "", + "python_packages": [], + "lib_source": "https://github.com/RedisAI/dlpack.git" + }, + { + "name": "libtorch", + "version": "2.4.0", + "pip_index": "https://download.pytorch.org/whl/rocm6.1", + "python_packages": [ + "torch==2.4.0+rocm6.1", + "torchvision==0.19.0+rocm6.1", + "torchaudio==2.4.0+rocm6.1" + ], + "lib_source": "https://download.pytorch.org/libtorch/rocm6.1/libtorch-cxx11-abi-shared-with-deps-2.4.1%2Brocm6.1.zip", + "rai_patches": [ + { + "description": "Patch RedisAI module to require C++17 standard instead of C++14", + "source_file": "src/backends/libtorch_c/CMakeLists.txt", + "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)", + "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)" + }, + { + "description": "Fix Regex, Load HIP", + "source_file": "../package/libtorch/share/cmake/Caffe2/public/LoadHIP.cmake", + "regex": ".*string.*", + "replacement": "" + }, + { + "description": "Replace `/opt/rocm` with `$ENV{ROCM_PATH}`", + "source_file": "../package/libtorch/share/cmake/Caffe2/Caffe2Targets.cmake", + "regex": "/opt/rocm", + "replacement": "$ENV{ROCM_PATH}" + } + ] + } + ] +} diff --git a/smartsim/_core/_install/mlpackages.py b/smartsim/_core/_install/mlpackages.py new file mode 100644 index 000000000..04e3798d3 --- /dev/null +++ b/smartsim/_core/_install/mlpackages.py @@ -0,0 +1,198 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json +import os +import pathlib +import re +import subprocess +import sys +import typing as t +from collections.abc import MutableMapping +from dataclasses import dataclass + +from tabulate import tabulate + +from .platform import Platform +from .types import PathLike +from .utils import retrieve + + +class RequireRelativePath(Exception): + pass + + +@dataclass +class RAIPatch: + """Holds information about how to patch a RedisAI source file + + :param description: Human-readable description of the patch's purpose + :param replacement: "The replacement for the line found by the regex" + :param source_file: A relative path to the chosen file + :param regex: A regex pattern to match in the given file + + """ + + description: str + replacement: str + source_file: pathlib.Path + regex: re.Pattern[str] + + def __post_init__(self) -> None: + self.source_file = pathlib.Path(self.source_file) + self.regex = re.compile(self.regex) + + +@dataclass +class MLPackage: + """Describes the python and C/C++ library for an ML package""" + + name: str + version: str + pip_index: str + python_packages: t.List[str] + lib_source: PathLike + rai_patches: t.Tuple[RAIPatch, ...] = () + + def retrieve(self, destination: PathLike) -> None: + """Retrieve an archive and/or repository for the package + + :param destination: Path to place the extracted package or repository + """ + retrieve(self.lib_source, pathlib.Path(destination)) + + def pip_install(self, quiet: bool = False) -> None: + """Install associated python packages + + :param quiet: If True, suppress most of the pip output, defaults to False + """ + if self.python_packages: + install_command = [sys.executable, "-m", "pip", "install"] + if self.pip_index: + install_command += ["--index-url", self.pip_index] + if quiet: + install_command += ["--quiet", "--no-warn-conflicts"] + install_command += self.python_packages + subprocess.check_call(install_command) + + +class MLPackageCollection(MutableMapping[str, MLPackage]): + """Collects multiple MLPackages + + Define a collection of MLPackages available for a specific platform + """ + + def __init__(self, platform: Platform, ml_packages: t.Sequence[MLPackage]): + self.platform = platform + self._ml_packages = {pkg.name: pkg for pkg in ml_packages} + + @classmethod + def from_json_file(cls, json_file: PathLike) -> "MLPackageCollection": + """Create an MLPackageCollection specified from a JSON file + + :param json_file: path to the JSON file + :return: An instance of MLPackageCollection for a platform + """ + with open(json_file, "r", encoding="utf-8") as file_handle: + config_json = json.load(file_handle) + platform = Platform.from_strs(**config_json["platform"]) + + for ml_package in config_json["ml_packages"]: + # Convert the dictionary representation to a RAIPatch + if "rai_patches" in ml_package: + patch_list = ml_package.pop("rai_patches") + ml_package["rai_patches"] = [RAIPatch(**patch) for patch in patch_list] + + ml_packages = [ + MLPackage(**ml_package) for ml_package in config_json["ml_packages"] + ] + return cls(platform, ml_packages) + + def __iter__(self) -> t.Iterator[str]: + """Iterate over the mlpackages in the collection + + :return: Iterator over mlpackages + """ + return iter(self._ml_packages) + + def __getitem__(self, key: str) -> MLPackage: + """Retrieve an MLPackage based on its name + + :param key: Name of the python package (e.g. libtorch) + :return: MLPackage with all requirements + """ + return self._ml_packages[key] + + def __len__(self) -> int: + return len(self._ml_packages) + + def __delitem__(self, key: str) -> None: + del self._ml_packages[key] + + def __setitem__(self, key: t.Any, value: t.Any) -> t.NoReturn: + raise TypeError(f"{type(self).__name__} does not support item assignment") + + def __contains__(self, key: object) -> bool: + return key in self._ml_packages + + def __str__(self, tablefmt: str = "github") -> str: + """Display package names and versions as a table + + :param tablefmt: Tabulate format, defaults to "github" + """ + + return tabulate( + [[k, v.version] for k, v in self._ml_packages.items()], + headers=["Package", "Version"], + tablefmt=tablefmt, + ) + + +def load_platform_configs( + config_file_path: pathlib.Path, +) -> t.Dict[Platform, MLPackageCollection]: + """Create MLPackageCollections from JSON files in directory + + :param config_file_path: Directory with JSON files describing the + configuration by platform + :return: Dictionary whose keys are the supported platform and values + are its associated MLPackageCollection + """ + if not config_file_path.is_dir(): + path = os.fspath(config_file_path) + msg = f"Platform configuration directory `{path}` does not exist" + raise FileNotFoundError(msg) + configs = {} + for config_file in config_file_path.glob("*.json"): + dependencies = MLPackageCollection.from_json_file(config_file) + configs[dependencies.platform] = dependencies + return configs + + +DEFAULT_MLPACKAGE_PATH: t.Final = ( + pathlib.Path(__file__).parent / "configs" / "mlpackages" +) +DEFAULT_MLPACKAGES: t.Final = load_platform_configs(DEFAULT_MLPACKAGE_PATH) diff --git a/smartsim/_core/_install/platform.py b/smartsim/_core/_install/platform.py new file mode 100644 index 000000000..bef13c6a0 --- /dev/null +++ b/smartsim/_core/_install/platform.py @@ -0,0 +1,226 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import enum +import json +import os +import pathlib +import platform +import typing as t +from dataclasses import dataclass + +from typing_extensions import Self + + +class PlatformError(Exception): + pass + + +class UnsupportedError(PlatformError): + pass + + +class Architecture(enum.Enum): + """Identifiers for supported CPU architectures + + :return: An enum representing the CPU architecture + """ + + X64 = "x86_64" + ARM64 = "arm64" + + @classmethod + def from_str(cls, string: str) -> "Architecture": + """Return enum associated with the architecture + + :param string: String representing the architecture, see platform.machine + :return: Enum for a specific architecture + """ + string = string.lower() + return cls(string) + + @classmethod + def autodetect(cls) -> "Architecture": + """Automatically return the architecture of the current machine + + :return: enum of this platform's architecture + """ + return cls.from_str(platform.machine()) + + +class Device(enum.Enum): + """Identifiers for the device stack + + :return: Enum associated with the device stack + """ + + CPU = "cpu" + CUDA11 = "cuda-11" + CUDA12 = "cuda-12" + ROCM5 = "rocm-5" + ROCM6 = "rocm-6" + + @classmethod + def from_str(cls, str_: str) -> "Device": + """Return enum associated with the device + + :param string: String representing the device and version + :return: Enum for a specific device + """ + str_ = str_.lower() + if str_ == "gpu": + # TODO: auto detect which device to use + # currently hard coded to `cuda11` + return cls.CUDA11 + return cls(str_) + + @classmethod + def detect_cuda_version(cls) -> t.Optional["Device"]: + """Find the enum based on environment CUDA + + :return: Enum for the version of CUDA currently available + """ + if cuda_home := os.environ.get("CUDA_HOME"): + cuda_path = pathlib.Path(cuda_home) + with open(cuda_path / "version.json", "r", encoding="utf-8") as file_handle: + cuda_versions = json.load(file_handle) + major = cuda_versions["cuda"]["version"].split(".")[0] + return cls.from_str(f"cuda-{major}") + return None + + @classmethod + def detect_rocm_version(cls) -> t.Optional["Device"]: + """Find the enum based on environment ROCm + + :return: Enum for the version of ROCm currently available + """ + if rocm_home := os.environ.get("ROCM_HOME"): + rocm_path = pathlib.Path(rocm_home) + fname = rocm_path / ".info" / "version" + with open(fname, "r", encoding="utf-8") as file_handle: + major = file_handle.readline().split("-")[0].split(".")[0] + return cls.from_str(f"rocm-{major}") + return None + + def is_gpu(self) -> bool: + """Whether the enum is categorized as a GPU + + :return: True if GPU + """ + return self != type(self).CPU + + def is_cuda(self) -> bool: + """Whether the enum is associated with a CUDA device + + :return: True for any supported CUDA enums + """ + cls = type(self) + return self in cls.cuda_enums() + + def is_rocm(self) -> bool: + """Whether the enum is associated with a ROCm device + + :return: True for any supported ROCm enums + """ + cls = type(self) + return self in cls.rocm_enums() + + @classmethod + def cuda_enums(cls) -> t.Tuple["Device", ...]: + """Detect all CUDA devices supported by SmartSim + + :return: all enums associated with CUDA + """ + return tuple(device for device in cls if "cuda" in device.value) + + @classmethod + def rocm_enums(cls) -> t.Tuple["Device", ...]: + """Detect all ROCm devices supported by SmartSim + + :return: all enums associated with ROCm + """ + return tuple(device for device in cls if "rocm" in device.value) + + +class OperatingSystem(enum.Enum): + """Enum for all supported operating systems""" + + LINUX = "linux" + DARWIN = "darwin" + + @classmethod + def from_str(cls, string: str, /) -> "OperatingSystem": + """Return enum associated with the OS + + :param string: String representing the OS + :return: Enum for a specific OS + """ + string = string.lower() + return cls(string) + + @classmethod + def autodetect(cls) -> "OperatingSystem": + """Automatically return the OS of the current machine + + :return: enum of this platform's OS + """ + return cls.from_str(platform.system()) + + +@dataclass(frozen=True) +class Platform: + """Container describing relevant identifiers for a platform""" + + operating_system: OperatingSystem + architecture: Architecture + device: Device + + @classmethod + def from_strs(cls, operating_system: str, architecture: str, device: str) -> Self: + """Factory method for Platform from string onput + + :param os: String identifier for the OS + :param architecture: String identifier for the architecture + :param device: String identifer for the device and version + :return: Instance of Platform + """ + return cls( + OperatingSystem.from_str(operating_system), + Architecture.from_str(architecture), + Device.from_str(device), + ) + + def __str__(self) -> str: + """Human-readable representation of Platform + + :return: String created from the values of the enums for each property + """ + output = [ + self.operating_system.name, + self.architecture.name, + self.device.name, + ] + return "-".join(output) diff --git a/smartsim/_core/_install/redisaiBuilder.py b/smartsim/_core/_install/redisaiBuilder.py new file mode 100644 index 000000000..1dce6ddb4 --- /dev/null +++ b/smartsim/_core/_install/redisaiBuilder.py @@ -0,0 +1,301 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import fileinput +import os +import pathlib +import shutil +import stat +import subprocess +import typing as t +from collections import deque + +from smartsim._core._cli.utils import SMART_LOGGER_FORMAT +from smartsim._core._install.buildenv import BuildEnv +from smartsim._core._install.mlpackages import MLPackageCollection, RAIPatch +from smartsim._core._install.platform import OperatingSystem, Platform +from smartsim._core._install.utils import retrieve +from smartsim._core.config import CONFIG +from smartsim.log import get_logger + +logger = get_logger("Smart", fmt=SMART_LOGGER_FORMAT) +_SUPPORTED_ROCM_ARCH = "gfx90a" + + +class RedisAIBuildError(Exception): + pass + + +class RedisAIBuilder: + """Class to build RedisAI from Source""" + + def __init__( + self, + platform: Platform, + mlpackages: MLPackageCollection, + build_env: BuildEnv, + main_build_path: pathlib.Path, + verbose: bool = False, + source: t.Union[str, pathlib.Path] = "https://github.com/RedisAI/RedisAI.git", + version: str = "v1.2.7", + ) -> None: + + self.platform = platform + self.mlpackages = mlpackages + self.build_env = build_env + self.verbose = verbose + self.source = source + self.version = version + self._root_path = main_build_path / "RedisAI" + + self.cleanup_build() + + @property + def src_path(self) -> pathlib.Path: + return pathlib.Path(self._root_path / "src") + + @property + def build_path(self) -> pathlib.Path: + return pathlib.Path(self._root_path / "build") + + @property + def package_path(self) -> pathlib.Path: + return pathlib.Path(self._root_path / "package") + + def cleanup_build(self) -> None: + """Removes all directories associated with the build""" + shutil.rmtree(self.src_path, ignore_errors=True) + shutil.rmtree(self.build_path, ignore_errors=True) + shutil.rmtree(self.package_path, ignore_errors=True) + + @property + def is_built(self) -> bool: + """Determine whether RedisAI and backends were built + + :return: True if all backends and RedisAI module are in + the expected location + """ + backend_dir = CONFIG.lib_path / "backends" + rai_exists = [ + (backend_dir / f"redisai_{backend_name}").is_dir() + for backend_name in self.mlpackages + ] + rai_exists.append((CONFIG.lib_path / "redisai.so").is_file()) + return all(rai_exists) + + @property + def build_torch(self) -> bool: + """Whether to build torch backend + + :return: True if torch backend should be built + """ + return "libtorch" in self.mlpackages + + @property + def build_tensorflow(self) -> bool: + """Whether to build tensorflow backend + + :return: True if tensorflow backend should be built + """ + return "libtensorflow" in self.mlpackages + + @property + def build_onnxruntime(self) -> bool: + """Whether to build onnx backend + + :return: True if onnx backend should be built + """ + return "onnxruntime" in self.mlpackages + + def build(self) -> None: + """Build RedisAI + + :param git_url: url from which to retrieve RedisAI + :param branch: branch to checkout + :param device: cpu or gpu + """ + + # Following is needed to make sure that the clone/checkout is not + # impeded by git LFS limits imposed by RedisAI + os.environ["GIT_LFS_SKIP_SMUDGE"] = "1" + + self.src_path.mkdir(parents=True) + self.build_path.mkdir(parents=True) + self.package_path.mkdir(parents=True) + + retrieve(self.source, self.src_path, depth=1, branch=self.version) + + self._prepare_packages() + + for package in self.mlpackages.values(): + self._patch_source_files(package.rai_patches) + cmake_command = self._rai_cmake_cmd() + build_command = self._rai_build_cmd + + if self.platform.device.is_rocm() and "libtorch" in self.mlpackages: + pytorch_rocm_arch = os.environ.get("PYTORCH_ROCM_ARCH") + if not pytorch_rocm_arch: + logger.info( + f"PYTORCH_ROCM_ARCH not set. Defaulting to '{_SUPPORTED_ROCM_ARCH}'" + ) + os.environ["PYTORCH_ROCM_ARCH"] = _SUPPORTED_ROCM_ARCH + elif pytorch_rocm_arch != _SUPPORTED_ROCM_ARCH: + logger.warning( + f"PYTORCH_ROCM_ARCH is not {_SUPPORTED_ROCM_ARCH} which is the " + "only officially supported architecture. This may still work " + "if you are supplying your own version of libtensorflow." + ) + + logger.info("Configuring CMake Build") + if self.verbose: + print(" ".join(cmake_command)) + self.run_command(cmake_command, self.build_path) + + logger.info("Building RedisAI") + if self.verbose: + print(" ".join(build_command)) + self.run_command(build_command, self.build_path) + + if self.platform.operating_system == OperatingSystem.LINUX: + self._set_execute(CONFIG.lib_path / "redisai.so") + + @staticmethod + def _set_execute(target: pathlib.Path) -> None: + """Set execute permissions for file + + :param target: The target file to add execute permission + """ + permissions = os.stat(target).st_mode | stat.S_IXUSR + os.chmod(target, permissions) + + @staticmethod + def _find_closest_object( + start_path: pathlib.Path, target_obj: str + ) -> t.Optional[pathlib.Path]: + queue = deque([start_path]) + while queue: + current_dir = queue.popleft() + current_target = current_dir / target_obj + if current_target.exists(): + return current_target.parent + for sub_dir in current_dir.iterdir(): + if sub_dir.is_dir(): + queue.append(sub_dir) + return None + + def _prepare_packages(self) -> None: + """Ensure that retrieved archives/packages are in the expected location + + RedisAI requires that the root directory of the backend is at + DEP_PATH/example_backend. Due to difficulties in retrieval methods and + naming conventions from different sources, this cannot be standardized. + Instead we try to find the parent of the "include" directory and assume + this is the root. + """ + + for package in self.mlpackages.values(): + logger.info(f"Retrieving package: {package.name} {package.version}") + target_dir = self.package_path / package.name + package.retrieve(target_dir) + # Move actual contents to root of the expected location + actual_root = self._find_closest_object(target_dir, "include") + if actual_root and actual_root != target_dir: + logger.debug( + ( + "Non-standard location found: \n", + f"{actual_root} -> {target_dir}", + ) + ) + for file in actual_root.iterdir(): + file.rename(target_dir / file.name) + + def run_command(self, cmd: t.Union[str, t.List[str]], cwd: pathlib.Path) -> None: + """Executor of commands usedi in the build + + :param cmd: The actual command to execute + :param cwd: The working directory to execute in + """ + stdout = None if self.verbose else subprocess.DEVNULL + stderr = None if self.verbose else subprocess.PIPE + proc = subprocess.run( + cmd, cwd=str(cwd), stdout=stdout, stderr=stderr, check=False + ) + if proc.returncode != 0: + if stderr: + print(proc.stderr.decode("utf-8")) + raise RedisAIBuildError( + f"RedisAI build failed during command: {' '.join(cmd)}" + ) + + def _rai_cmake_cmd(self) -> t.List[str]: + """Build the CMake configuration command + + :return: CMake command with correct options + """ + + def on_off(expression: bool) -> t.Literal["ON", "OFF"]: + return "ON" if expression else "OFF" + + cmake_args = { + "BUILD_TF": on_off(self.build_tensorflow), + "BUILD_ORT": on_off(self.build_onnxruntime), + "BUILD_TORCH": on_off(self.build_torch), + "BUILD_TFLITE": "OFF", + "DEPS_PATH": str(self.package_path), + "DEVICE": "gpu" if self.platform.device.is_gpu() else "cpu", + "INSTALL_PATH": str(CONFIG.lib_path), + "CMAKE_C_COMPILER": self.build_env.CC, + "CMAKE_CXX_COMPILER": self.build_env.CXX, + } + if self.platform.device.is_rocm(): + cmake_args["Torch_DIR"] = str(self.package_path / "libtorch") + cmd = ["cmake"] + cmd += (f"-D{key}={value}" for key, value in cmake_args.items()) + cmd.append(str(self.src_path)) + return cmd + + @property + def _rai_build_cmd(self) -> t.List[str]: + """Shell command to build RedisAI and modules + + With the CMake based install, very little needs to be done here. + "make install" is used to ensure that all resulting RedisAI backends + and their dependencies end up in the same location with the correct + RPATH if applicable. + + :return: Command used to compile RedisAI and backends + """ + return "make install -j VERBOSE=1".split(" ") + + def _patch_source_files(self, patches: t.Tuple[RAIPatch, ...]) -> None: + """Apply specified RedisAI patches""" + for patch in patches: + with fileinput.input( + str(self.src_path / patch.source_file), inplace=True + ) as file_handle: + for line in file_handle: + line = patch.regex.sub(patch.replacement, line) + print(line, end="") diff --git a/smartsim/_core/_install/types.py b/smartsim/_core/_install/types.py new file mode 100644 index 000000000..0266ace34 --- /dev/null +++ b/smartsim/_core/_install/types.py @@ -0,0 +1,30 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pathlib +import typing as t + +PathLike = t.Union[str, pathlib.Path] diff --git a/smartsim/_core/_install/utils/__init__.py b/smartsim/_core/_install/utils/__init__.py new file mode 100644 index 000000000..4e47cf282 --- /dev/null +++ b/smartsim/_core/_install/utils/__init__.py @@ -0,0 +1,27 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .retrieve import retrieve diff --git a/smartsim/_core/_install/utils/retrieve.py b/smartsim/_core/_install/utils/retrieve.py new file mode 100644 index 000000000..fcac565d4 --- /dev/null +++ b/smartsim/_core/_install/utils/retrieve.py @@ -0,0 +1,185 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import pathlib +import shutil +import tarfile +import typing as t +import zipfile +from urllib.parse import urlparse +from urllib.request import urlretrieve + +import git +from tqdm import tqdm + +from smartsim._core._install.platform import Architecture, OperatingSystem +from smartsim._core._install.types import PathLike + + +class UnsupportedArchive(Exception): + pass + + +class _TqdmUpTo(tqdm): # type: ignore[type-arg] + """Provides `update_to(n)` which uses `tqdm.update(delta_n)` + + From tqdm doumentation for progress bar when downloading + """ + + def update_to( + self, num_blocks: int = 1, bsize: int = 1, tsize: t.Optional[int] = None + ) -> t.Optional[bool]: + """Update progress in tqdm-like way + + :param b: number of blocks transferred so far, defaults to 1 + :param bsize: size of each block (in tqdm units), defaults to 1 + :param tsize: total size (in tqdm units), defaults to None + :return: Update + """ + + if tsize is not None: + self.total = tsize + return self.update(num_blocks * bsize - self.n) # also sets self.n = b * bsize + + +def _from_local_archive( + source: PathLike, + destination: pathlib.Path, + **kwargs: t.Any, +) -> None: + """Decompress a local archive + + :param source: Path to the archive on a local system + :param destination: Where to unpack the archive + """ + if tarfile.is_tarfile(source): + with tarfile.open(source) as archive: + archive.extractall(path=destination, **kwargs) + if zipfile.is_zipfile(source): + with zipfile.ZipFile(source) as archive: + archive.extractall(path=destination, **kwargs) + + +def _from_local_directory( + source: PathLike, + destination: pathlib.Path, + **kwargs: t.Any, +) -> None: + """Copy the contents of a directory + + :param source: source directory + :param destination: desitnation directory + """ + shutil.copytree(source, destination, **kwargs) + + +def _from_http( + source: str, + destination: pathlib.Path, + **kwargs: t.Any, +) -> None: + """Download and decompress a package + + :param source: URL to a particular package + :param destination: Where to unpack the archive + """ + with _TqdmUpTo( + unit="B", + unit_scale=True, + unit_divisor=1024, + miniters=1, + desc=source.split("/")[-1], + ) as _t: # all optional kwargs + local_file, _ = urlretrieve(source, reporthook=_t.update_to, **kwargs) + _t.total = _t.n + + _from_local_archive(local_file, destination) + os.remove(local_file) + + +def _from_git(source: str, destination: pathlib.Path, **clone_kwargs: t.Any) -> None: + """Clone a repository + + :param source: Path to the remote (URL or local) repository + :param destination: where to clone the repository + :param clone_kwargs: various options to send to the clone command + """ + is_mac = OperatingSystem.autodetect() == OperatingSystem.DARWIN + is_arm64 = Architecture.autodetect() == Architecture.ARM64 + if is_mac and is_arm64: + config_options = ["--config core.autocrlf=false", "--config core.eol=lf"] + allow_unsafe_options = True + else: + config_options = None + allow_unsafe_options = False + git.Repo.clone_from( + source, + destination, + multi_options=config_options, + allow_unsafe_options=allow_unsafe_options, + **clone_kwargs, + ) + + +def retrieve( + source: PathLike, destination: pathlib.Path, **retrieve_kwargs: t.Any +) -> None: + """Primary method for retrieval + + Automatically choose the correct method based on the extension and/or source + of the archive. If downloaded, this will also decompress the archive and + extract + + :param source: URL or path to find the package + :param destination: where to place the package + :raises UnsupportedArchive: Unknown archive type + :raises FileNotFound: Path to archive does not exist + """ + parsed_url = urlparse(str(source)) + url_scheme = parsed_url.scheme + if parsed_url.path.endswith(".git"): + _from_git(str(source), destination, **retrieve_kwargs) + elif url_scheme == "http": + _from_http(str(source), destination, **retrieve_kwargs) + elif url_scheme == "https": + _from_http(str(source), destination, **retrieve_kwargs) + else: # This is probably a path + source_path = pathlib.Path(source) + if not source_path.exists(): + raise FileNotFoundError(f"Package path or file does not exist: {source}") + if source_path.is_dir(): + _from_local_directory(source, destination, **retrieve_kwargs) + elif source_path.is_file() and source_path.suffix in ( + ".gz", + ".zip", + ".tgz", + ): + _from_local_archive(source, destination, **retrieve_kwargs) + else: + raise UnsupportedArchive( + f"Source ({source}) is not a supported archive or directory " + ) diff --git a/smartsim/_core/config/config.py b/smartsim/_core/config/config.py index 9cf950b21..03c284edb 100644 --- a/smartsim/_core/config/config.py +++ b/smartsim/_core/config/config.py @@ -33,7 +33,7 @@ import psutil from ...error import SSConfigError -from ..utils.helpers import expand_exe_path +from ..utils import expand_exe_path # Configuration Values # @@ -94,13 +94,28 @@ class Config: def __init__(self) -> None: # SmartSim/smartsim/_core self.core_path = Path(os.path.abspath(__file__)).parent.parent + # TODO: Turn this into a property. Need to modify the configuration + # of KeyDB vs Redis at build time + self.conf_dir = self.core_path / "config" + self.conf_path = self.conf_dir / "redis.conf" - dependency_path = os.environ.get("SMARTSIM_DEP_INSTALL_PATH", self.core_path) + @property + def dependency_path(self) -> Path: + return Path( + os.environ.get("SMARTSIM_DEP_INSTALL_PATH", str(self.core_path)) + ).resolve() + + @property + def lib_path(self) -> Path: + return Path(self.dependency_path, "lib") - self.lib_path = Path(dependency_path, "lib").resolve() - self.bin_path = Path(dependency_path, "bin").resolve() - self.conf_path = Path(dependency_path, "config", "redis.conf") - self.conf_dir = Path(self.core_path, "config") + @property + def bin_path(self) -> Path: + return Path(self.dependency_path, "bin") + + @property + def build_path(self) -> Path: + return Path(self.dependency_path, "build") @property def redisai(self) -> str: @@ -157,7 +172,7 @@ def database_file_parse_interval(self) -> int: @property def dragon_dotenv(self) -> Path: """Returns the path to a .env file containing dragon environment variables""" - return self.conf_dir / "dragon" / ".env" + return Path(self.conf_dir / "dragon" / ".env") @property def dragon_server_path(self) -> t.Optional[str]: diff --git a/smartsim/_core/control/controller.py b/smartsim/_core/control/controller.py index 43a218545..0b943ee90 100644 --- a/smartsim/_core/control/controller.py +++ b/smartsim/_core/control/controller.py @@ -72,6 +72,7 @@ LocalLauncher, LSFLauncher, PBSLauncher, + SGELauncher, SlurmLauncher, ) from ..launcher.launcher import Launcher @@ -343,6 +344,7 @@ def init_launcher(self, launcher: str) -> None: "lsf": LSFLauncher, "local": LocalLauncher, "dragon": DragonLauncher, + "sge": SGELauncher, } if launcher is not None: diff --git a/smartsim/_core/launcher/__init__.py b/smartsim/_core/launcher/__init__.py index d78909641..c6584ee3d 100644 --- a/smartsim/_core/launcher/__init__.py +++ b/smartsim/_core/launcher/__init__.py @@ -29,6 +29,7 @@ from .local.local import LocalLauncher from .lsf.lsfLauncher import LSFLauncher from .pbs.pbsLauncher import PBSLauncher +from .sge.sgeLauncher import SGELauncher from .slurm.slurmLauncher import SlurmLauncher __all__ = [ @@ -37,5 +38,6 @@ "LocalLauncher", "LSFLauncher", "PBSLauncher", + "SGELauncher", "SlurmLauncher", ] diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py index 245660662..4aba60d55 100644 --- a/smartsim/_core/launcher/dragon/dragonBackend.py +++ b/smartsim/_core/launcher/dragon/dragonBackend.py @@ -210,10 +210,13 @@ def group_infos(self) -> dict[str, ProcessGroupInfo]: def _initialize_hosts(self) -> None: with self._queue_lock: - self._hosts: t.List[str] = sorted( - dragon_machine.Node(node).hostname - for node in dragon_machine.System().nodes - ) + self._nodes = [ + dragon_machine.Node(node) for node in dragon_machine.System().nodes + ] + self._hosts: t.List[str] = sorted(node.hostname for node in self._nodes) + self._cpus = [node.num_cpus for node in self._nodes] + self._gpus = [node.num_gpus for node in self._nodes] + """List of hosts available in allocation""" self._free_hosts: t.Deque[str] = collections.deque(self._hosts) """List of hosts on which steps can be launched""" @@ -285,6 +288,34 @@ def current_time(self) -> float: """Current time for DragonBackend object, in seconds since the Epoch""" return time.time() + def _can_honor_policy( + self, request: DragonRunRequest + ) -> t.Tuple[bool, t.Optional[str]]: + """Check if the policy can be honored with resources available + in the allocation. + :param request: DragonRunRequest containing policy information + :returns: Tuple indicating if the policy can be honored and + an optional error message""" + # ensure the policy can be honored + if request.policy: + if request.policy.cpu_affinity: + # make sure some node has enough CPUs + available = max(self._cpus) + requested = max(request.policy.cpu_affinity) + + if requested >= available: + return False, "Cannot satisfy request, not enough CPUs available" + + if request.policy.gpu_affinity: + # make sure some node has enough GPUs + available = max(self._gpus) + requested = max(request.policy.gpu_affinity) + + if requested >= available: + return False, "Cannot satisfy request, not enough GPUs available" + + return True, None + def _can_honor(self, request: DragonRunRequest) -> t.Tuple[bool, t.Optional[str]]: """Check if request can be honored with resources available in the allocation. @@ -299,6 +330,11 @@ def _can_honor(self, request: DragonRunRequest) -> t.Tuple[bool, t.Optional[str] if self._shutdown_requested: message = "Cannot satisfy request, server is shutting down." return False, message + + honorable, err = self._can_honor_policy(request) + if not honorable: + return False, err + return True, None def _allocate_step( @@ -391,6 +427,50 @@ def _stop_steps(self) -> None: self._group_infos[step_id].status = SmartSimStatus.STATUS_CANCELLED self._group_infos[step_id].return_codes = [-9] + @staticmethod + def create_run_policy( + request: DragonRequest, node_name: str + ) -> "dragon_policy.Policy": + """Create a dragon Policy from the request and node name + :param request: DragonRunRequest containing policy information + :param node_name: Name of the node on which the process will run + :returns: dragon_policy.Policy object mapped from request properties""" + if isinstance(request, DragonRunRequest): + run_request: DragonRunRequest = request + + affinity = dragon_policy.Policy.Affinity.DEFAULT + cpu_affinity: t.List[int] = [] + gpu_affinity: t.List[int] = [] + + # Customize policy only if the client requested it, otherwise use default + if run_request.policy is not None: + # Affinities are not mutually exclusive. If specified, both are used + if run_request.policy.cpu_affinity: + affinity = dragon_policy.Policy.Affinity.SPECIFIC + cpu_affinity = run_request.policy.cpu_affinity + + if run_request.policy.gpu_affinity: + affinity = dragon_policy.Policy.Affinity.SPECIFIC + gpu_affinity = run_request.policy.gpu_affinity + logger.debug( + f"Affinity strategy: {affinity}, " + f"CPU affinity mask: {cpu_affinity}, " + f"GPU affinity mask: {gpu_affinity}" + ) + if affinity != dragon_policy.Policy.Affinity.DEFAULT: + return dragon_policy.Policy( + placement=dragon_policy.Policy.Placement.HOST_NAME, + host_name=node_name, + affinity=affinity, + cpu_affinity=cpu_affinity, + gpu_affinity=gpu_affinity, + ) + + return dragon_policy.Policy( + placement=dragon_policy.Policy.Placement.HOST_NAME, + host_name=node_name, + ) + def _start_steps(self) -> None: self._heartbeat() with self._queue_lock: @@ -412,10 +492,7 @@ def _start_steps(self) -> None: policies = [] for node_name in hosts: - local_policy = dragon_policy.Policy( - placement=dragon_policy.Policy.Placement.HOST_NAME, - host_name=node_name, - ) + local_policy = self.create_run_policy(request, node_name) policies.extend([local_policy] * request.tasks_per_node) tmp_proc = dragon_process.ProcessTemplate( target=request.exe, diff --git a/smartsim/_core/launcher/dragon/dragonLauncher.py b/smartsim/_core/launcher/dragon/dragonLauncher.py index 17b47e309..9078fed54 100644 --- a/smartsim/_core/launcher/dragon/dragonLauncher.py +++ b/smartsim/_core/launcher/dragon/dragonLauncher.py @@ -29,6 +29,8 @@ import os import typing as t +from smartsim._core.schemas.dragonRequests import DragonRunPolicy + from ...._core.launcher.stepMapping import StepMap from ....error import LauncherError, SmartSimError from ....log import get_logger @@ -168,6 +170,9 @@ def run(self, step: Step) -> t.Optional[str]: merged_env = self._connector.merge_persisted_env(os.environ.copy()) nodes = int(run_args.get("nodes", None) or 1) tasks_per_node = int(run_args.get("tasks-per-node", None) or 1) + + policy = DragonRunPolicy.from_run_args(run_args) + response = _assert_schema_type( self._connector.send_request( DragonRunRequest( @@ -181,6 +186,7 @@ def run(self, step: Step) -> t.Optional[str]: current_env=merged_env, output_file=out, error_file=err, + policy=policy, ) ), DragonRunResponse, diff --git a/smartsim/_core/launcher/lsf/lsfCommands.py b/smartsim/_core/launcher/lsf/lsfCommands.py index cb92587c1..0b98abf58 100644 --- a/smartsim/_core/launcher/lsf/lsfCommands.py +++ b/smartsim/_core/launcher/lsf/lsfCommands.py @@ -26,7 +26,7 @@ import typing as t -from ..util.shell import execute_cmd +from ...utils.shell import execute_cmd def bjobs(args: t.List[str]) -> t.Tuple[str, str]: diff --git a/smartsim/_core/launcher/pbs/pbsCommands.py b/smartsim/_core/launcher/pbs/pbsCommands.py index 989af93be..2a8fcf872 100644 --- a/smartsim/_core/launcher/pbs/pbsCommands.py +++ b/smartsim/_core/launcher/pbs/pbsCommands.py @@ -26,7 +26,7 @@ import typing as t -from ..util.shell import execute_cmd +from ...utils.shell import execute_cmd def qstat(args: t.List[str]) -> t.Tuple[str, str]: diff --git a/smartsim/_core/launcher/sge/__init__.py b/smartsim/_core/launcher/sge/__init__.py new file mode 100644 index 000000000..efe03908e --- /dev/null +++ b/smartsim/_core/launcher/sge/__init__.py @@ -0,0 +1,25 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/smartsim/_core/launcher/sge/sgeCommands.py b/smartsim/_core/launcher/sge/sgeCommands.py new file mode 100644 index 000000000..a284ee8db --- /dev/null +++ b/smartsim/_core/launcher/sge/sgeCommands.py @@ -0,0 +1,77 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import typing as t + +from ...utils.shell import execute_cmd + + +def qstat(args: t.List[str]) -> t.Tuple[str, str]: + """Calls SGE qstat with args + + :param args: List of command arguments + :returns: Output and error of qstat + """ + cmd = ["qstat"] + args + _, out, error = execute_cmd(cmd) + return out, error + + +def qsub(args: t.List[str]) -> t.Tuple[str, str]: + """Calls SGE qsub with args + + :param args: List of command arguments + :returns: Output and error of salloc + """ + cmd = ["qsub"] + args + _, out, error = execute_cmd(cmd) + return out, error + + +def qdel(args: t.List[str]) -> t.Tuple[int, str, str]: + """Calls SGE qdel with args. + + returncode is also supplied in this function. + + :param args: list of command arguments + :return: output and error + """ + cmd = ["qdel"] + args + returncode, out, error = execute_cmd(cmd) + return returncode, out, error + + +def qacct(args: t.List[str]) -> t.Tuple[int, str, str]: + """Calls SGE qacct with args. + + returncode is also supplied in this function. + + :param args: list of command arguments + :return: output and error + """ + cmd = ["qacct"] + args + returncode, out, error = execute_cmd(cmd) + return returncode, out, error diff --git a/smartsim/_core/launcher/sge/sgeLauncher.py b/smartsim/_core/launcher/sge/sgeLauncher.py new file mode 100644 index 000000000..af600cf1d --- /dev/null +++ b/smartsim/_core/launcher/sge/sgeLauncher.py @@ -0,0 +1,184 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import time +import typing as t + +from ....error import LauncherError +from ....log import get_logger +from ....settings import ( + MpiexecSettings, + MpirunSettings, + OrterunSettings, + RunSettings, + SettingsBase, + SgeQsubBatchSettings, +) +from ....status import SmartSimStatus +from ...config import CONFIG +from ..launcher import WLMLauncher +from ..step import ( + LocalStep, + MpiexecStep, + MpirunStep, + OrterunStep, + SgeQsubBatchStep, + Step, +) +from ..stepInfo import SGEStepInfo, StepInfo +from .sgeCommands import qacct, qdel, qstat +from .sgeParser import parse_qacct_job_output, parse_qstat_jobid_xml + +logger = get_logger(__name__) + + +class SGELauncher(WLMLauncher): + """This class encapsulates the functionality needed + to launch jobs on systems that use SGE as a workload manager. + + All WLM launchers are capable of launching managed and unmanaged + jobs. Managed jobs are queried through interaction with with WLM, + in this case SGE. Unmanaged jobs are held in the TaskManager + and are managed through references to their launching process ID + i.e. a psutil.Popen object + """ + + # init in WLMLauncher, launcher.py + + @property + def supported_rs(self) -> t.Dict[t.Type[SettingsBase], t.Type[Step]]: + # RunSettings types supported by this launcher + return { + SgeQsubBatchSettings: SgeQsubBatchStep, + MpiexecSettings: MpiexecStep, + MpirunSettings: MpirunStep, + OrterunSettings: OrterunStep, + RunSettings: LocalStep, + } + + def run(self, step: Step) -> t.Optional[str]: + """Run a job step through SGE + + :param step: a job step instance + :raises LauncherError: if launch fails + :return: job step id if job is managed + """ + if not self.task_manager.actively_monitoring: + self.task_manager.start() + + cmd_list = step.get_launch_cmd() + step_id: t.Optional[str] = None + task_id: t.Optional[str] = None + if isinstance(step, SgeQsubBatchStep): + # wait for batch step to submit successfully + return_code, out, err = self.task_manager.start_and_wait(cmd_list, step.cwd) + if return_code != 0: + raise LauncherError(f"Qsub batch submission failed\n {out}\n {err}") + if out: + step_id = out.split(" ")[2] + logger.debug(f"Gleaned batch job id: {step_id} for {step.name}") + else: + # aprun/local doesn't direct output for us. + out, err = step.get_output_files() + + # pylint: disable-next=consider-using-with + output = open(out, "w+", encoding="utf-8") + # pylint: disable-next=consider-using-with + error = open(err, "w+", encoding="utf-8") + task_id = self.task_manager.start_task( + cmd_list, step.cwd, step.env, out=output.fileno(), err=error.fileno() + ) + + self.step_mapping.add(step.name, step_id, task_id, step.managed) + + return step_id + + def stop(self, step_name: str) -> StepInfo: + """Stop/cancel a job step + + :param step_name: name of the job to stop + :return: update for job due to cancel + """ + stepmap = self.step_mapping[step_name] + if stepmap.managed: + qdel_rc, _, err = qdel([str(stepmap.step_id)]) + if qdel_rc != 0: + logger.warning(f"Unable to cancel job step {step_name}\n {err}") + if stepmap.task_id: + self.task_manager.remove_task(str(stepmap.task_id)) + else: + self.task_manager.remove_task(str(stepmap.task_id)) + + _, step_info = self.get_step_update([step_name])[0] + if not step_info: + raise LauncherError(f"Could not get step_info for job step {step_name}") + + step_info.status = ( + SmartSimStatus.STATUS_CANCELLED + ) # set status to cancelled instead of failed + return step_info + + def _get_managed_step_update(self, step_ids: t.List[str]) -> t.List[StepInfo]: + """Get step updates for WLM managed jobs + + :param step_ids: list of job step ids + :return: list of updates for managed jobs + """ + updates: t.List[StepInfo] = [] + + qstat_out, _ = qstat(["-xml"]) + stats = [parse_qstat_jobid_xml(qstat_out, str(step_id)) for step_id in step_ids] + + for stat, step_id in zip(stats, step_ids): + if stat is None: + info = SGEStepInfo("NOTFOUND") + # Attempt to retrieve the historical record + return_code, qacct_output, _ = qacct([f"-j {step_id}"]) + num_trials = 0 + while return_code != 0 and num_trials < CONFIG.wlm_trials: + num_trials += 1 + time.sleep(CONFIG.jm_interval) + return_code, qacct_output, _ = qacct([f"-j {step_id}"]) + + if qacct_output: + failed = bool(int(parse_qacct_job_output(qacct_output, "failed"))) + if failed: + info.status = SmartSimStatus.STATUS_FAILED + info.returncode = 0 + else: + info.status = SmartSimStatus.STATUS_COMPLETED + info.returncode = 0 + else: # Assume if qacct did not find it, that the job completed + info.status = SmartSimStatus.STATUS_COMPLETED + info.returncode = 0 + else: + info = SGEStepInfo(stat) + + updates.append(info) + return updates + + def __str__(self) -> str: + return "SGE" diff --git a/smartsim/_core/launcher/sge/sgeParser.py b/smartsim/_core/launcher/sge/sgeParser.py new file mode 100644 index 000000000..0ee5d5c67 --- /dev/null +++ b/smartsim/_core/launcher/sge/sgeParser.py @@ -0,0 +1,92 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import typing as t +import xml.etree.ElementTree as ET + + +def parse_qsub(output: str) -> str: + """Parse qsub output and return job id. For SGE, the + output is the job id itself. + + :param output: stdout of qsub command + :returns: job id + """ + return output + + +def parse_qsub_error(output: str) -> str: + """Parse and return error output of a failed qsub command. + + :param output: stderr of qsub command + :returns: error message + """ + # look for error first + for line in output.split("\n"): + if line.startswith("qsub:"): + error = line.split(":")[1] + return error.strip() + # if no error line, take first line + for line in output.split("\n"): + return line.strip() + # if neither, present a base error message + base_err = "PBS run error" + return base_err + + +def parse_qstat_jobid_xml(output: str, job_id: str) -> t.Optional[str]: + """Parse and return output of the qstat command run with XML options + to obtain job status. + + :param output: output of the qstat command in XML format + :param job_id: allocation id or job step id + :return: status + """ + + root = ET.fromstring(output) + for job_list in root.findall(".//job_list"): + job_state = job_list.find("state") + # not None construct is needed here, since element with no + # children returns 0, interpreted as False + if (job_number := job_list.find("JB_job_number")) is not None: + if job_number.text == job_id and (job_state is not None): + return job_state.text + + return None + + +def parse_qacct_job_output(output: str, field_name: str) -> t.Union[str, int]: + """Parse the output from qacct for a single job + + :param output: The raw text output from qacct + :param field_name: The name of the field to extract + """ + + for line in output.splitlines(): + if field_name in line: + return line.split()[1] + + return 1 diff --git a/smartsim/_core/launcher/slurm/slurmCommands.py b/smartsim/_core/launcher/slurm/slurmCommands.py index 839826297..e72a87af4 100644 --- a/smartsim/_core/launcher/slurm/slurmCommands.py +++ b/smartsim/_core/launcher/slurm/slurmCommands.py @@ -29,7 +29,7 @@ from ....error import LauncherError from ....log import get_logger from ...utils.helpers import expand_exe_path -from ..util.shell import execute_cmd +from ...utils.shell import execute_cmd logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/step/__init__.py b/smartsim/_core/launcher/step/__init__.py index c492f3e97..8331a18bf 100644 --- a/smartsim/_core/launcher/step/__init__.py +++ b/smartsim/_core/launcher/step/__init__.py @@ -30,5 +30,6 @@ from .lsfStep import BsubBatchStep, JsrunStep from .mpiStep import MpiexecStep, MpirunStep, OrterunStep from .pbsStep import QsubBatchStep +from .sgeStep import SgeQsubBatchStep from .slurmStep import SbatchStep, SrunStep from .step import Step diff --git a/smartsim/_core/launcher/step/dragonStep.py b/smartsim/_core/launcher/step/dragonStep.py index 036a9e565..dd93d7910 100644 --- a/smartsim/_core/launcher/step/dragonStep.py +++ b/smartsim/_core/launcher/step/dragonStep.py @@ -30,7 +30,11 @@ import sys import typing as t -from ...._core.schemas.dragonRequests import DragonRunRequest, request_registry +from ...._core.schemas.dragonRequests import ( + DragonRunPolicy, + DragonRunRequest, + request_registry, +) from ....error.errors import SSUnsupportedError from ....log import get_logger from ....settings import ( @@ -166,8 +170,11 @@ def _write_request_file(self) -> str: nodes = int(run_args.get("nodes", None) or 1) tasks_per_node = int(run_args.get("tasks-per-node", None) or 1) + policy = DragonRunPolicy.from_run_args(run_args) + cmd = step.get_launch_cmd() out, err = step.get_output_files() + request = DragonRunRequest( exe=cmd[0], exe_args=cmd[1:], @@ -179,6 +186,7 @@ def _write_request_file(self) -> str: current_env=os.environ, output_file=out, error_file=err, + policy=policy, ) requests.append(request_registry.to_string(request)) with open(request_file, "w", encoding="utf-8") as script_file: diff --git a/smartsim/_core/launcher/step/mpiStep.py b/smartsim/_core/launcher/step/mpiStep.py index 767486462..9ae3af2fc 100644 --- a/smartsim/_core/launcher/step/mpiStep.py +++ b/smartsim/_core/launcher/step/mpiStep.py @@ -54,7 +54,7 @@ def __init__(self, name: str, cwd: str, run_settings: RunSettings) -> None: self._set_alloc() self.run_settings = run_settings - _supported_launchers = ["PBS", "SLURM", "LSB"] + _supported_launchers = ["PBS", "SLURM", "LSB", "SGE"] @proxyable_launch_cmd def get_launch_cmd(self) -> t.List[str]: @@ -102,7 +102,10 @@ def _set_alloc(self) -> None: environment_keys = os.environ.keys() for launcher in self._supported_launchers: - jobid_field = f"{launcher.upper()}_JOBID" + if launcher == "SGE": + jobid_field = "JOB_ID" + else: + jobid_field = f"{launcher.upper()}_JOBID" if jobid_field in environment_keys: self.alloc = os.environ[jobid_field] logger.debug(f"Running on allocation {self.alloc} from {jobid_field}") diff --git a/smartsim/_core/launcher/step/sgeStep.py b/smartsim/_core/launcher/step/sgeStep.py new file mode 100644 index 000000000..2406b19da --- /dev/null +++ b/smartsim/_core/launcher/step/sgeStep.py @@ -0,0 +1,95 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import typing as t + +from ....log import get_logger +from ....settings import SgeQsubBatchSettings +from .step import Step + +logger = get_logger(__name__) + + +class SgeQsubBatchStep(Step): + def __init__( + self, name: str, cwd: str, batch_settings: SgeQsubBatchSettings + ) -> None: + """Initialize a Sun Grid Engine qsub step + + :param name: name of the entity to launch + :param cwd: path to launch dir + :param batch_settings: batch settings for entity + """ + super().__init__(name, cwd, batch_settings) + self.step_cmds: t.List[t.List[str]] = [] + self.managed = True + self.batch_settings = batch_settings + + def get_launch_cmd(self) -> t.List[str]: + """Get the launch command for the batch + + :return: launch command for the batch + """ + script = self._write_script() + return [self.batch_settings.batch_cmd, script] + + def add_to_batch(self, step: Step) -> None: + """Add a job step to this batch + + :param step: a job step instance e.g. SrunStep + """ + launch_cmd = step.get_launch_cmd() + self.step_cmds.append(launch_cmd) + logger.debug(f"Added step command to batch for {step.name}") + + def _write_script(self) -> str: + """Write the batch script + + :return: batch script path after writing + """ + batch_script = self.get_step_file(ending=".sh") + output, error = self.get_output_files() + with open(batch_script, "w", encoding="utf-8") as script_file: + script_file.write(f"{self.batch_settings.shebang}\n\n") + script_file.write(f"#$ -o {output}\n") + script_file.write(f"#$ -e {error}\n") + script_file.write(f"#$ -N {self.name}\n") + script_file.write("#$ -V\n") + + # add additional sbatch options + for opt in self.batch_settings.format_batch_args(): + script_file.write(f"#$ {opt}\n") + + for cmd in self.batch_settings.preamble: + script_file.write(f"{cmd}\n") + + for i, step_cmd in enumerate(self.step_cmds): + script_file.write("\n") + script_file.write(f"{' '.join((step_cmd))} &\n") + if i == len(self.step_cmds) - 1: + script_file.write("\n") + script_file.write("wait\n") + return batch_script diff --git a/smartsim/_core/launcher/step/step.py b/smartsim/_core/launcher/step/step.py index 2cce6e610..171254e32 100644 --- a/smartsim/_core/launcher/step/step.py +++ b/smartsim/_core/launcher/step/step.py @@ -26,6 +26,7 @@ from __future__ import annotations +import copy import functools import os.path as osp import pathlib @@ -51,7 +52,7 @@ def __init__(self, name: str, cwd: str, step_settings: SettingsBase) -> None: self.entity_name = name self.cwd = cwd self.managed = False - self.step_settings = step_settings + self.step_settings = copy.deepcopy(step_settings) self.meta: t.Dict[str, str] = {} @property diff --git a/smartsim/_core/launcher/stepInfo.py b/smartsim/_core/launcher/stepInfo.py index 875eb0322..b68527cb3 100644 --- a/smartsim/_core/launcher/stepInfo.py +++ b/smartsim/_core/launcher/stepInfo.py @@ -151,7 +151,7 @@ def __init__( class PBSStepInfo(StepInfo): # cov-pbs @property def mapping(self) -> t.Dict[str, SmartSimStatus]: - # pylint: disable=line-too-long + # pylint: disable-next=line-too-long # see http://nusc.nsu.ru/wiki/lib/exe/fetch.php/doc/pbs/PBSReferenceGuide19.2.1.pdf#M11.9.90788.PBSHeading1.81.Job.States return { "R": SmartSimStatus.STATUS_RUNNING, @@ -201,7 +201,7 @@ def __init__( class LSFBatchStepInfo(StepInfo): # cov-lsf @property def mapping(self) -> t.Dict[str, SmartSimStatus]: - # pylint: disable=line-too-long + # pylint: disable-next=line-too-long # see https://www.ibm.com/docs/en/spectrum-lsf/10.1.0?topic=execution-about-job-states return { "RUN": SmartSimStatus.STATUS_RUNNING, @@ -239,7 +239,7 @@ def __init__( class LSFJsrunStepInfo(StepInfo): # cov-lsf @property def mapping(self) -> t.Dict[str, SmartSimStatus]: - # pylint: disable=line-too-long + # pylint: disable-next=line-too-long # see https://www.ibm.com/docs/en/spectrum-lsf/10.1.0?topic=execution-about-job-states return { "Killed": SmartSimStatus.STATUS_COMPLETED, @@ -270,3 +270,77 @@ def __init__( super().__init__( smartsim_status, status, returncode, output=output, error=error ) + + +class SGEStepInfo(StepInfo): # cov-pbs + @property + def mapping(self) -> t.Dict[str, SmartSimStatus]: + # pylint: disable-next=line-too-long + # see https://manpages.ubuntu.com/manpages/jammy/man5/sge_status.5.html + return { + # Running states + "r": SmartSimStatus.STATUS_RUNNING, + "hr": SmartSimStatus.STATUS_RUNNING, + "t": SmartSimStatus.STATUS_RUNNING, + "Rr": SmartSimStatus.STATUS_RUNNING, + "Rt": SmartSimStatus.STATUS_RUNNING, + # Queued states + "qw": SmartSimStatus.STATUS_QUEUED, + "Rq": SmartSimStatus.STATUS_QUEUED, + "hqw": SmartSimStatus.STATUS_QUEUED, + "hRwq": SmartSimStatus.STATUS_QUEUED, + # Paused states + "s": SmartSimStatus.STATUS_PAUSED, + "ts": SmartSimStatus.STATUS_PAUSED, + "S": SmartSimStatus.STATUS_PAUSED, + "tS": SmartSimStatus.STATUS_PAUSED, + "T": SmartSimStatus.STATUS_PAUSED, + "tT": SmartSimStatus.STATUS_PAUSED, + "Rs": SmartSimStatus.STATUS_PAUSED, + "Rts": SmartSimStatus.STATUS_PAUSED, + "RS": SmartSimStatus.STATUS_PAUSED, + "RtS": SmartSimStatus.STATUS_PAUSED, + "RT": SmartSimStatus.STATUS_PAUSED, + "RtT": SmartSimStatus.STATUS_PAUSED, + # Failed states + "Eqw": SmartSimStatus.STATUS_FAILED, + "Ehqw": SmartSimStatus.STATUS_FAILED, + "EhRqw": SmartSimStatus.STATUS_FAILED, + # Finished states + "z": SmartSimStatus.STATUS_COMPLETED, + # Cancelled + "dr": SmartSimStatus.STATUS_CANCELLED, + "dt": SmartSimStatus.STATUS_CANCELLED, + "dRr": SmartSimStatus.STATUS_CANCELLED, + "dRt": SmartSimStatus.STATUS_CANCELLED, + "ds": SmartSimStatus.STATUS_CANCELLED, + "dS": SmartSimStatus.STATUS_CANCELLED, + "dT": SmartSimStatus.STATUS_CANCELLED, + "dRs": SmartSimStatus.STATUS_CANCELLED, + "dRS": SmartSimStatus.STATUS_CANCELLED, + "dRT": SmartSimStatus.STATUS_CANCELLED, + } + + def __init__( + self, + status: str = "", + returncode: t.Optional[int] = None, + output: t.Optional[str] = None, + error: t.Optional[str] = None, + ) -> None: + if status == "NOTFOUND": + if returncode is not None: + smartsim_status = ( + SmartSimStatus.STATUS_COMPLETED + if returncode == 0 + else SmartSimStatus.STATUS_FAILED + ) + else: + # if PBS job history is not available, and job is not in queue + smartsim_status = SmartSimStatus.STATUS_COMPLETED + returncode = 0 + else: + smartsim_status = self._get_smartsim_status(status) + super().__init__( + smartsim_status, status, returncode, output=output, error=error + ) diff --git a/smartsim/_core/launcher/taskManager.py b/smartsim/_core/launcher/taskManager.py index 60f097da6..1bc26d043 100644 --- a/smartsim/_core/launcher/taskManager.py +++ b/smartsim/_core/launcher/taskManager.py @@ -36,7 +36,7 @@ from ...error import LauncherError from ...log import ContextThread, get_logger from ..utils.helpers import check_dev_log_level -from .util.shell import execute_async_cmd, execute_cmd +from ..utils.shell import execute_async_cmd, execute_cmd logger = get_logger(__name__) VERBOSE_TM = check_dev_log_level() # pylint: disable=invalid-name diff --git a/smartsim/_core/schemas/dragonRequests.py b/smartsim/_core/schemas/dragonRequests.py index 3e384f746..487ea915a 100644 --- a/smartsim/_core/schemas/dragonRequests.py +++ b/smartsim/_core/schemas/dragonRequests.py @@ -26,9 +26,10 @@ import typing as t -from pydantic import BaseModel, Field, PositiveInt +from pydantic import BaseModel, Field, NonNegativeInt, PositiveInt, ValidationError import smartsim._core.schemas.utils as _utils +from smartsim.error.errors import SmartSimError # Black and Pylint disagree about where to put the `...` # pylint: disable=multiple-statements @@ -39,6 +40,43 @@ class DragonRequest(BaseModel): ... +class DragonRunPolicy(BaseModel): + """Policy specifying hardware constraints when running a Dragon job""" + + cpu_affinity: t.List[NonNegativeInt] = Field(default_factory=list) + """List of CPU indices to which the job should be pinned""" + gpu_affinity: t.List[NonNegativeInt] = Field(default_factory=list) + """List of GPU indices to which the job should be pinned""" + + @staticmethod + def from_run_args( + run_args: t.Dict[str, t.Union[int, str, float, None]] + ) -> "DragonRunPolicy": + """Create a DragonRunPolicy with hardware constraints passed from + a dictionary of run arguments + :param run_args: Dictionary of run arguments + :returns: DragonRunPolicy instance created from the run arguments""" + gpu_args = "" + if gpu_arg_value := run_args.get("gpu-affinity", None): + gpu_args = str(gpu_arg_value) + + cpu_args = "" + if cpu_arg_value := run_args.get("cpu-affinity", None): + cpu_args = str(cpu_arg_value) + + # run args converted to a string must be split back into a list[int] + gpu_affinity = [int(x.strip()) for x in gpu_args.split(",") if x] + cpu_affinity = [int(x.strip()) for x in cpu_args.split(",") if x] + + try: + return DragonRunPolicy( + cpu_affinity=cpu_affinity, + gpu_affinity=gpu_affinity, + ) + except ValidationError as ex: + raise SmartSimError("Unable to build DragonRunPolicy") from ex + + class DragonRunRequestView(DragonRequest): exe: t.Annotated[str, Field(min_length=1)] exe_args: t.List[t.Annotated[str, Field(min_length=1)]] = [] @@ -57,6 +95,7 @@ class DragonRunRequestView(DragonRequest): @request_registry.register("run") class DragonRunRequest(DragonRunRequestView): current_env: t.Dict[str, t.Optional[str]] = {} + policy: t.Optional[DragonRunPolicy] = None def __str__(self) -> str: return str(DragonRunRequestView.parse_obj(self.dict(exclude={"current_env"}))) diff --git a/smartsim/_core/types.py b/smartsim/_core/types.py new file mode 100644 index 000000000..d3dc029ea --- /dev/null +++ b/smartsim/_core/types.py @@ -0,0 +1,32 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import enum + + +class Device(enum.Enum): + CPU = "cpu" + GPU = "gpu" diff --git a/smartsim/_core/utils/__init__.py b/smartsim/_core/utils/__init__.py index 3ea928797..cddbc4ce9 100644 --- a/smartsim/_core/utils/__init__.py +++ b/smartsim/_core/utils/__init__.py @@ -29,6 +29,7 @@ colorize, delete_elements, execute_platform_cmd, + expand_exe_path, installed_redisai_backends, is_crayex_platform, ) diff --git a/smartsim/_core/utils/helpers.py b/smartsim/_core/utils/helpers.py index df2c016a1..b17be763b 100644 --- a/smartsim/_core/utils/helpers.py +++ b/smartsim/_core/utils/helpers.py @@ -39,12 +39,11 @@ from pathlib import Path from shutil import which -from smartsim._core._install.builder import TRedisAIBackendStr as _TRedisAIBackendStr - if t.TYPE_CHECKING: from types import FrameType +_TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime"] _TSignalHandlerFn = t.Callable[[int, t.Optional["FrameType"]], object] @@ -230,7 +229,9 @@ def redis_install_base(backends_path: t.Optional[str] = None) -> Path: # pylint: disable-next=import-outside-toplevel from ..._core.config import CONFIG - base_path = Path(backends_path) if backends_path else CONFIG.lib_path / "backends" + base_path: Path = ( + Path(backends_path) if backends_path else CONFIG.lib_path / "backends" + ) return base_path @@ -255,10 +256,10 @@ def installed_redisai_backends( "tensorflow", "torch", "onnxruntime", - "tflite", } - return {backend for backend in backends if _installed(base_path, backend)} + installed = {backend for backend in backends if _installed(base_path, backend)} + return installed def get_ts_ms() -> int: diff --git a/smartsim/_core/utils/redis.py b/smartsim/_core/utils/redis.py index 7fa59ad83..76ff45cd5 100644 --- a/smartsim/_core/utils/redis.py +++ b/smartsim/_core/utils/redis.py @@ -39,8 +39,8 @@ from ...error import SSInternalError from ...log import get_logger from ..config import CONFIG -from ..launcher.util.shell import execute_cmd from .network import get_ip_from_host +from .shell import execute_cmd logging.getLogger("rediscluster").setLevel(logging.WARNING) logger = get_logger(__name__) diff --git a/smartsim/_core/launcher/util/shell.py b/smartsim/_core/utils/shell.py similarity index 97% rename from smartsim/_core/launcher/util/shell.py rename to smartsim/_core/utils/shell.py index a2b5bc76b..4cfe2998c 100644 --- a/smartsim/_core/launcher/util/shell.py +++ b/smartsim/_core/utils/shell.py @@ -30,9 +30,9 @@ import psutil -from ....error import ShellError -from ....log import get_logger -from ...utils.helpers import check_dev_log_level +from ...error import ShellError +from ...log import get_logger +from .helpers import check_dev_log_level logger = get_logger(__name__) VERBOSE_SHELL = check_dev_log_level() diff --git a/smartsim/database/orchestrator.py b/smartsim/database/orchestrator.py index f6ce0310f..e2549891a 100644 --- a/smartsim/database/orchestrator.py +++ b/smartsim/database/orchestrator.py @@ -28,6 +28,7 @@ import itertools import os.path as osp +import shutil import sys import typing as t from os import environ, getcwd, getenv @@ -41,6 +42,7 @@ from .._core.utils import db_is_active from .._core.utils.helpers import is_valid_cmd, unpack_db_identifier from .._core.utils.network import get_ip_from_host +from .._core.utils.shell import execute_cmd from ..entity import DBNode, EntityList, TelemetryConfiguration from ..error import ( SmartSimError, @@ -75,6 +77,7 @@ "pals": ["mpiexec"], "lsf": ["jsrun"], "local": [""], + "sge": ["mpirun", "mpiexec", "orterun"], } @@ -186,8 +189,6 @@ def __init__( Extra configurations for RedisAI - See https://oss.redis.com/redisai/configuration/ - :param path: path to location of ``Orchestrator`` directory :param port: TCP/IP port :param interface: network interface(s) @@ -280,14 +281,35 @@ def __init__( ) if hosts: self.set_hosts(hosts) - elif not hosts and self.run_command == "mpirun": - raise SmartSimError( - "hosts argument is required when launching Orchestrator with mpirun" - ) + elif not hosts: + mpilike = run_command in ["mpirun", "mpiexec", "orterun"] + if mpilike and not self._mpi_has_sge_support(): + raise SmartSimError( + ( + "hosts argument required when launching ", + "Orchestrator with mpirun", + ) + ) self._reserved_run_args: t.Dict[t.Type[RunSettings], t.List[str]] = {} self._reserved_batch_args: t.Dict[t.Type[BatchSettings], t.List[str]] = {} self._fill_reserved() + def _mpi_has_sge_support(self) -> bool: + """Check if MPI command supports SGE + + If the run command is mpirun, mpiexec, or orterun, there is a possibility + that the user is using OpenMPI with SGE grid support. In this case, hosts + do not need to be set. + + :returns: bool + """ + + if self.run_command in ["mpirun", "orterun", "mpiexec"]: + if shutil.which("ompi_info"): + _, output, _ = execute_cmd(["ompi_info"]) + return "gridengine" in output + return False + @property def db_identifier(self) -> str: """Return the DB identifier, which is common to a DB and all of its nodes diff --git a/smartsim/entity/dbobject.py b/smartsim/entity/dbobject.py index 5cb0d061f..fa9983c50 100644 --- a/smartsim/entity/dbobject.py +++ b/smartsim/entity/dbobject.py @@ -27,7 +27,8 @@ import typing as t from pathlib import Path -from .._core._install.builder import Device +from smartsim._core.types import Device + from ..error import SSUnsupportedError __all__ = ["DBObject", "DBModel", "DBScript"] diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py index cab138685..965b10db7 100644 --- a/smartsim/entity/ensemble.py +++ b/smartsim/entity/ensemble.py @@ -31,7 +31,8 @@ from tabulate import tabulate -from .._core._install.builder import Device +from smartsim._core.types import Device + from ..error import ( EntityExistsError, SmartSimError, diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py index 3f78e042c..3e8baad5c 100644 --- a/smartsim/entity/model.py +++ b/smartsim/entity/model.py @@ -27,6 +27,7 @@ from __future__ import annotations import itertools +import numbers import re import sys import typing as t @@ -34,7 +35,8 @@ from os import getcwd from os import path as osp -from .._core._install.builder import Device +from smartsim._core.types import Device + from .._core.utils.helpers import cat_arg_and_value from ..error import EntityExistsError, SSUnsupportedError from ..log import get_logger @@ -46,6 +48,25 @@ logger = get_logger(__name__) +def _parse_model_parameters(params_dict: t.Dict[str, t.Any]) -> t.Dict[str, str]: + """Convert the values in a params dict to strings + :raises TypeError: if params are of the wrong type + :return: param dictionary with values and keys cast as strings + """ + param_names: t.List[str] = [] + parameters: t.List[str] = [] + for name, val in params_dict.items(): + param_names.append(name) + if isinstance(val, (str, numbers.Number)): + parameters.append(str(val)) + else: + raise TypeError( + "Incorrect type for model parameters\n" + + "Must be numeric value or string." + ) + return dict(zip(param_names, parameters)) + + class Model(SmartSimEntity): def __init__( self, @@ -70,7 +91,7 @@ def __init__( model as a batch job """ super().__init__(name, str(path), run_settings) - self.params = params + self.params = _parse_model_parameters(params) self.params_as_args = params_as_args self.incoming_entities: t.List[SmartSimEntity] = [] self._key_prefixing_enabled = False diff --git a/smartsim/error/__init__.py b/smartsim/error/__init__.py index 3a40548e7..c7122fe42 100644 --- a/smartsim/error/__init__.py +++ b/smartsim/error/__init__.py @@ -28,6 +28,7 @@ AllocationError, EntityExistsError, LauncherError, + LauncherUnsupportedFeature, ParameterWriterError, ShellError, SmartSimError, diff --git a/smartsim/error/errors.py b/smartsim/error/errors.py index 333258a34..0cb38d7e6 100644 --- a/smartsim/error/errors.py +++ b/smartsim/error/errors.py @@ -108,6 +108,10 @@ class LauncherError(SSInternalError): """Raised when there is an error in the launcher""" +class LauncherUnsupportedFeature(LauncherError): + """Raised when the launcher does not support a given method""" + + class AllocationError(LauncherError): """Raised when there is a problem with the user WLM allocation""" diff --git a/smartsim/experiment.py b/smartsim/experiment.py index 6b9d6a1fb..607a90ae1 100644 --- a/smartsim/experiment.py +++ b/smartsim/experiment.py @@ -144,7 +144,7 @@ def __init__( :param name: name for the ``Experiment`` :param exp_path: path to location of ``Experiment`` directory :param launcher: type of launcher being used, options are "slurm", "pbs", - "lsf", or "local". If set to "auto", + "lsf", "sge", or "local". If set to "auto", an attempt will be made to find an available launcher on the system. """ diff --git a/smartsim/ml/tf/__init__.py b/smartsim/ml/tf/__init__.py index 46d89d733..ee791ea98 100644 --- a/smartsim/ml/tf/__init__.py +++ b/smartsim/ml/tf/__init__.py @@ -31,23 +31,12 @@ logger = get_logger(__name__) vers = Versioner() -TF_VERSION = vers.TENSORFLOW try: import tensorflow as tf except ImportError: # pragma: no cover raise ModuleNotFoundError( - f"TensorFlow {TF_VERSION} is not installed. " - "Please install it to use smartsim.ml.tf" - ) from None - -try: - installed_tf = Version_(tf.__version__) - assert installed_tf >= TF_VERSION -except AssertionError: # pragma: no cover - raise SmartSimError( - f"TensorFlow >= {TF_VERSION} is required for smartsim. " - f"tf, you have {tf.__version__}" + f"TensorFlow is not installed. Please install it to use smartsim.ml.tf" ) from None diff --git a/smartsim/ml/tf/utils.py b/smartsim/ml/tf/utils.py index cf69b65e5..4e45f1847 100644 --- a/smartsim/ml/tf/utils.py +++ b/smartsim/ml/tf/utils.py @@ -29,7 +29,7 @@ import keras import tensorflow as tf -from tensorflow.python.framework.convert_to_constants import ( +from tensorflow.python.framework.convert_to_constants import ( # type: ignore[import-not-found,unused-ignore] convert_variables_to_constants_v2, ) @@ -62,7 +62,7 @@ def freeze_model( tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype) ) - frozen_func = convert_variables_to_constants_v2(full_model) + frozen_func = convert_variables_to_constants_v2(full_model) # type: ignore[no-untyped-call,unused-ignore] frozen_func.graph.as_graph_def() input_names = [x.name.split(":")[0] for x in frozen_func.inputs] @@ -97,7 +97,7 @@ def serialize_model(model: keras.Model) -> t.Tuple[str, t.List[str], t.List[str] tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype) ) - frozen_func = convert_variables_to_constants_v2(full_model) + frozen_func = convert_variables_to_constants_v2(full_model) # type: ignore[no-untyped-call,unused-ignore] frozen_func.graph.as_graph_def() input_names = [x.name.split(":")[0] for x in frozen_func.inputs] diff --git a/smartsim/settings/__init__.py b/smartsim/settings/__init__.py index 6e8f0bc96..8052121e2 100644 --- a/smartsim/settings/__init__.py +++ b/smartsim/settings/__init__.py @@ -32,6 +32,7 @@ from .mpiSettings import MpiexecSettings, MpirunSettings, OrterunSettings from .palsSettings import PalsMpiexecSettings from .pbsSettings import QsubBatchSettings +from .sgeSettings import SgeQsubBatchSettings from .slurmSettings import SbatchSettings, SrunSettings __all__ = [ @@ -45,6 +46,7 @@ "RunSettings", "SettingsBase", "SbatchSettings", + "SgeQsubBatchSettings", "SrunSettings", "PalsMpiexecSettings", "DragonRunSettings", diff --git a/smartsim/settings/base.py b/smartsim/settings/base.py index 6373b52fd..da3edb491 100644 --- a/smartsim/settings/base.py +++ b/smartsim/settings/base.py @@ -594,9 +594,13 @@ def __init__( self._batch_cmd = batch_cmd self.batch_args = batch_args or {} self._preamble: t.List[str] = [] - self.set_nodes(kwargs.get("nodes", None)) + nodes = kwargs.get("nodes", None) + if nodes: + self.set_nodes(nodes) + queue = kwargs.get("queue", None) + if queue: + self.set_queue(queue) self.set_walltime(kwargs.get("time", None)) - self.set_queue(kwargs.get("queue", None)) self.set_account(kwargs.get("account", None)) @property diff --git a/smartsim/settings/dragonRunSettings.py b/smartsim/settings/dragonRunSettings.py index b8baa4708..69a91547e 100644 --- a/smartsim/settings/dragonRunSettings.py +++ b/smartsim/settings/dragonRunSettings.py @@ -28,6 +28,8 @@ import typing as t +from typing_extensions import override + from ..log import get_logger from .base import RunSettings @@ -63,6 +65,7 @@ def __init__( **kwargs, ) + @override def set_nodes(self, nodes: int) -> None: """Set the number of nodes @@ -70,9 +73,38 @@ def set_nodes(self, nodes: int) -> None: """ self.run_args["nodes"] = nodes + @override def set_tasks_per_node(self, tasks_per_node: int) -> None: """Set the number of tasks for this job :param tasks_per_node: number of tasks per node """ self.run_args["tasks-per-node"] = tasks_per_node + + @override + def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None: + """Specify the node feature for this job + + :param feature_list: a collection of strings representing the required + node features. Currently supported node features are: "gpu" + """ + if isinstance(feature_list, str): + feature_list = feature_list.strip().split() + elif not all(isinstance(feature, str) for feature in feature_list): + raise TypeError("feature_list must be string or list of strings") + + self.run_args["node-feature"] = ",".join(feature_list) + + def set_cpu_affinity(self, devices: t.List[int]) -> None: + """Set the CPU affinity for this job + + :param devices: list of CPU indices to execute on + """ + self.run_args["cpu-affinity"] = ",".join(str(device) for device in devices) + + def set_gpu_affinity(self, devices: t.List[int]) -> None: + """Set the GPU affinity for this job + + :param devices: list of GPU indices to execute on. + """ + self.run_args["gpu-affinity"] = ",".join(str(device) for device in devices) diff --git a/smartsim/settings/settings.py b/smartsim/settings/settings.py index 5f7fc3fe2..5afd0e192 100644 --- a/smartsim/settings/settings.py +++ b/smartsim/settings/settings.py @@ -41,6 +41,7 @@ QsubBatchSettings, RunSettings, SbatchSettings, + SgeQsubBatchSettings, SrunSettings, base, ) @@ -78,6 +79,7 @@ def create_batch_settings( "slurm": SbatchSettings, "lsf": BsubBatchSettings, "pals": QsubBatchSettings, + "sge": SgeQsubBatchSettings, } if launcher in ["auto", "dragon"]: @@ -153,6 +155,7 @@ def create_run_settings( "pbs": ["aprun", "mpirun", "mpiexec"], "pals": ["mpiexec"], "lsf": ["jsrun", "mpirun", "mpiexec"], + "sge": ["mpirun", "mpiexec"], "local": [""], } diff --git a/smartsim/settings/sgeSettings.py b/smartsim/settings/sgeSettings.py new file mode 100644 index 000000000..a5cd3f2b0 --- /dev/null +++ b/smartsim/settings/sgeSettings.py @@ -0,0 +1,293 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import typing as t + +from ..error import LauncherUnsupportedFeature, SSConfigError +from ..log import get_logger +from .base import BatchSettings + +logger = get_logger(__name__) + + +class SgeQsubBatchSettings(BatchSettings): + def __init__( + self, + time: t.Optional[str] = None, + ncpus: t.Optional[int] = None, + pe_type: t.Optional[str] = None, + account: t.Optional[str] = None, + shebang: str = "#!/bin/bash -l", + resources: t.Optional[t.Dict[str, t.Union[str, int]]] = None, + batch_args: t.Optional[t.Dict[str, t.Optional[str]]] = None, + **kwargs: t.Any, + ): + """Specify SGE batch parameters for a job + + :param time: walltime for batch job + :param ncpus: number of cpus per node + :param pe_type: type of parallel environment + :param queue: queue to run batch in + :param account: account for batch launch + :param resources: overrides for resource arguments + :param batch_args: overrides for SGE batch arguments + """ + + if "nodes" in kwargs: + kwargs["nodes"] = 0 + + self.resources = resources or {} + if ncpus: + self.set_ncpus(ncpus) + if pe_type: + self.set_pe_type(pe_type) + self.set_shebang(shebang) + + # time, queue, nodes, and account set in parent class init + super().__init__( + "qsub", + batch_args=batch_args, + account=account, + time=time, + **kwargs, + ) + + self._context_variables: t.List[str] = [] + self._env_vars: t.Dict[str, str] = {} + + @property + def resources(self) -> t.Dict[str, t.Union[str, int]]: + return self._resources.copy() + + @resources.setter + def resources(self, resources: t.Dict[str, t.Union[str, int]]) -> None: + self._sanity_check_resources(resources) + self._resources = resources.copy() + + def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None: + raise LauncherUnsupportedFeature( + "SGE does not support requesting specific hosts in batch jobs" + ) + + def set_queue(self, queue: str) -> None: + raise LauncherUnsupportedFeature("SGE does not support specifying queues") + + def set_shebang(self, shebang: str) -> None: + """Set the shebang (shell) for the batch job + + :param shebang: The shebang used to interpret the rest of script + (e.g. #!/bin/bash) + """ + self.shebang = shebang + + def set_walltime(self, walltime: str) -> None: + """Set the walltime of the job + + format = "HH:MM:SS" + + If a walltime argument is provided in + ``SGEBatchSettings.resources``, then + this value will be overridden + + :param walltime: wall time + """ + if walltime: + self.set_resource("h_rt", walltime) + + def set_nodes(self, num_nodes: t.Optional[int]) -> None: + """Set the number of nodes, invalid for SGE + + :param nodes: Number of nodes, any integer other than 0 is invalid + """ + if num_nodes: + raise LauncherUnsupportedFeature( + "SGE does not support setting the number of nodes" + ) + + def set_ncpus(self, num_cpus: t.Union[int, str]) -> None: + """Set the number of cpus obtained in each node. + + :param num_cpus: number of cpus per node in select + """ + self.set_resource("ncpus", int(num_cpus)) + + def set_ngpus(self, num_gpus: t.Union[int, str]) -> None: + """Set the number of GPUs obtained in each node. + + :param num_gpus: number of GPUs per node in select + """ + self.set_resource("gpu", num_gpus) + + def set_account(self, account: str) -> None: + """Set the account for this batch job + + :param acct: account id + """ + if account: + self.batch_args["A"] = str(account) + + def set_project(self, project: str) -> None: + """Set the project for this batch job + + :param acct: project id + """ + if project: + self.batch_args["P"] = str(project) + + def update_context_variables( + self, + action: t.Literal["ac", "sc", "dc"], + var_name: str, + value: t.Optional[t.Union[int, str]] = None, + ) -> None: + """ + Add, set, or delete context variables + + Configure any context variables using SGE's -ac, -sc, and -dc + qsub switches. These modifications are appended each time this + method is called, so the order does matter + + :param action: Add, set, or delete a context variable (ac, dc, or sc) + :param var_name: The name of the variable to set + :param value: The value of the variable + """ + if action not in ["ac", "sc", "dc"]: + raise ValueError("The action argument must be ac, sc, or dc") + if action == "dc" and value: + raise SSConfigError("When using the 'dc' action, value should not be set") + + command = f"-{action} {var_name}" + if value: + command += f"={value}" + self._context_variables.append(command) + + def set_hyperthreading(self, enable: bool = True) -> None: + """Enable or disable hyperthreading + + :param enable: Enable (True) or disable (False) hypthreading + """ + self.set_resource("threads", int(enable)) + + def set_memory_per_pe(self, memory_spec: str) -> None: + """Set the amount of memory per processing element + + :param memory_spec: The amount of memory per PE (e.g. 2G) + """ + self.set_resource("mem", memory_spec) + + def set_pe_type(self, pe_type: str) -> None: + """Set the parallel environment + + :param pe_type: parallel environment identifier (e.g. mpi or smp) + """ + if pe_type: + self.set_resource("pe_type", pe_type) + + def set_threads_per_pe(self, threads_per_core: int) -> None: + """Sets the number of threads per processing element + + :param threads_per_core: Number of threads per core + """ + + self._env_vars["OMP_NUM_THREADS"] = str(threads_per_core) + + def set_resource(self, resource_name: str, value: t.Union[str, int]) -> None: + """Set a resource value for the SGE batch + + If a select statement is provided, the nodes and ncpus + arguments will be overridden. Likewise for Walltime + + :param resource_name: name of resource, e.g. walltime + :param value: value + """ + updated_dict = self.resources + updated_dict.update({resource_name: value}) + self._sanity_check_resources(updated_dict) + self.resources = updated_dict + + def format_batch_args(self) -> t.List[str]: + """Get the formatted batch arguments for a preview + + :return: batch arguments for SGE + :raises ValueError: if options are supplied without values + """ + opts = self._create_resource_list() + for opt, value in self.batch_args.items(): + prefix = "-" + if not value: + raise ValueError("SGE options without values are not allowed") + opts += [" ".join((prefix + opt, str(value)))] + return opts + + def _sanity_check_resources( + self, resources: t.Optional[t.Dict[str, t.Union[str, int]]] = None + ) -> None: + """Check that resources are correctly formatted""" + # Note: isinstance check here to avoid collision with default + checked_resources = resources if isinstance(resources, dict) else self.resources + + for key, value in checked_resources.items(): + if not isinstance(key, str): + raise TypeError( + f"The type of {key=} is {type(key)}. Only int and str " + "are allowed." + ) + if not isinstance(value, (str, int)): + raise TypeError( + f"The value associated with {key=} is {type(value)}. Only int " + "and str are allowed." + ) + + def _create_resource_list(self) -> t.List[str]: + self._sanity_check_resources() + res = [] + + # Pop off some specific keywords that need to be treated separately + resources = self.resources # Note this is a copy so not modifying original + + # Construct the configuration of the parallel environment + ncpus = resources.pop("ncpus", None) + pe_type = resources.pop("pe_type", None) + if (pe_type is None and ncpus) or (pe_type and ncpus is None): + msg = f"{ncpus=} and {pe_type=} must both be set. " + msg += "Call set_ncpus and/or set_pe_type." + raise SSConfigError(msg) + + if pe_type and ncpus: + res += [f"-pe {pe_type} {ncpus}"] + + # Deal with context variables + for context_variable in self._context_variables: + res += [context_variable] + + # All other "standard" resource specs + for resource, value in resources.items(): + res += [f"-l {resource}={value}"] + + # Set any environment variables + for key, value in self._env_vars.items(): + res += [f"-v {key}={value}"] + return res diff --git a/smartsim/status.py b/smartsim/status.py index e42ef3191..e0d950619 100644 --- a/smartsim/status.py +++ b/smartsim/status.py @@ -35,6 +35,7 @@ class SmartSimStatus(Enum): STATUS_NEW = "New" STATUS_PAUSED = "Paused" STATUS_NEVER_STARTED = "NeverStarted" + STATUS_QUEUED = "Queued" TERMINAL_STATUSES = { diff --git a/tests/backends/run_torch.py b/tests/backends/run_torch.py index 6e9ba2859..b3c0fc964 100644 --- a/tests/backends/run_torch.py +++ b/tests/backends/run_torch.py @@ -74,7 +74,7 @@ def calc_svd(input_tensor): return input_tensor.svd() -def run(device): +def run(device, num_devices): # connect a client to the database client = Client(cluster=False) @@ -92,9 +92,23 @@ def run(device): net = create_torch_model() # 20 samples of "image" data example_forward_input = torch.rand(20, 1, 28, 28) - client.set_model("cnn", net, "TORCH", device=device) client.put_tensor("input", example_forward_input.numpy()) - client.run_model("cnn", inputs=["input"], outputs=["output"]) + if device == "CPU": + client.set_model("cnn", net, "TORCH", device=device) + client.run_model("cnn", inputs=["input"], outputs=["output"]) + else: + client.set_model_multigpu( + "cnn", net, "TORCH", first_gpu=0, num_gpus=num_devices + ) + client.run_model_multigpu( + "cnn", + offset=1, + first_gpu=0, + num_gpus=num_devices, + inputs=["input"], + outputs=["output"], + ) + output = client.get_tensor("output") print(f"Prediction: {output}") @@ -106,5 +120,11 @@ def run(device): parser.add_argument( "--device", type=str, default="CPU", help="device type for model execution" ) + parser.add_argument( + "--num-devices", + type=int, + default=1, + help="Number of devices to set the model on", + ) args = parser.parse_args() - run(args.device) + run(args.device, args.num_devices) diff --git a/tests/backends/test_cli_mini_exp.py b/tests/backends/test_cli_mini_exp.py index 2fde2ff5f..3379bf2ee 100644 --- a/tests/backends/test_cli_mini_exp.py +++ b/tests/backends/test_cli_mini_exp.py @@ -32,6 +32,7 @@ import smartsim._core._cli.validate import smartsim._core._install.builder as build +from smartsim._core._install.platform import Device from smartsim._core.utils.helpers import installed_redisai_backends sklearn_available = True @@ -79,7 +80,7 @@ def _mock_make_managed_local_orc(*a, **kw): location=test_dir, port=db_port, # Always test on CPU, heads don't always have GPU - device=build.Device.CPU, + device=Device.CPU, # Test the backends the dev has installed with_tf="tensorflow" in backends, with_pt="torch" in backends, diff --git a/tests/backends/test_torch.py b/tests/backends/test_torch.py index c995f76ca..6aff6b0ba 100644 --- a/tests/backends/test_torch.py +++ b/tests/backends/test_torch.py @@ -65,9 +65,11 @@ def test_torch_model_and_script( db = prepare_db(single_db).orchestrator wlm_experiment.reconnect_orchestrator(db.checkpoint_file) test_device = mlutils.get_test_device() + test_num_gpus = mlutils.get_test_num_gpus() if pytest.test_device == "GPU" else 1 run_settings = wlm_experiment.create_run_settings( - "python", f"run_torch.py --device={test_device}" + "python", + ["run_torch.py", f"--device={test_device}", f"--num-devices={test_num_gpus}"], ) if wlmutils.get_test_launcher() != "local": run_settings.set_tasks(1) diff --git a/tests/install/test_build.py b/tests/install/test_build.py new file mode 100644 index 000000000..f8a5c4896 --- /dev/null +++ b/tests/install/test_build.py @@ -0,0 +1,148 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import operator + +import pytest + +from smartsim._core._cli.build import parse_requirement +from smartsim._core._install.buildenv import Version_ + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + + +_SUPPORTED_OPERATORS = ("==", ">=", ">", "<=", "<") + + +@pytest.mark.parametrize( + "spec, name, pin", + ( + pytest.param("foo", "foo", None, id="Just Name"), + pytest.param("foo==1", "foo", "==1", id="With Major"), + pytest.param("foo==1.2", "foo", "==1.2", id="With Minor"), + pytest.param("foo==1.2.3", "foo", "==1.2.3", id="With Patch"), + pytest.param("foo[with-extras]==1.2.3", "foo", "==1.2.3", id="With Extra"), + pytest.param( + "foo[with,many,extras]==1.2.3", "foo", "==1.2.3", id="With Many Extras" + ), + *( + pytest.param( + f"foo{symbol}1.2.3{tag}", + "foo", + f"{symbol}1.2.3{tag}", + id=f"{symbol=} | {tag=}", + ) + for symbol in _SUPPORTED_OPERATORS + for tag in ("", "+cuda", "+rocm", "+cpu") + ), + ), +) +def test_parse_requirement_name_and_version(spec, name, pin): + p_name, p_pin, _ = parse_requirement(spec) + assert p_name == name + assert p_pin == pin + + +# fmt: off +@pytest.mark.parametrize( + "spec, ver, should_pass", + ( + pytest.param("foo" , Version_("1.2.3") , True, id="No spec"), + # EQ -------------------------------------------------------------------------- + pytest.param("foo==1.2.3" , Version_("1.2.3") , True, id="EQ Spec, EQ Version"), + pytest.param("foo==1.2.3" , Version_("1.2.5") , False, id="EQ Spec, GT Version"), + pytest.param("foo==1.2.3" , Version_("1.2.2") , False, id="EQ Spec, LT Version"), + pytest.param("foo==1.2.3+rocm", Version_("1.2.3+rocm"), True, id="EQ Spec, Compatible Version with suffix"), + pytest.param("foo==1.2.3" , Version_("1.2.3+cuda"), False, id="EQ Spec, Compatible Version, Extra Suffix"), + pytest.param("foo==1.2.3+cuda", Version_("1.2.3") , False, id="EQ Spec, Compatible Version, Missing Suffix"), + pytest.param("foo==1.2.3+cuda", Version_("1.2.3+rocm"), False, id="EQ Spec, Compatible Version, Mismatched Suffix"), + # LT -------------------------------------------------------------------------- + pytest.param("foo<1.2.3" , Version_("1.2.3") , False, id="LT Spec, EQ Version"), + pytest.param("foo<1.2.3" , Version_("1.2.5") , False, id="LT Spec, GT Version"), + pytest.param("foo<1.2.3" , Version_("1.2.2") , True, id="LT Spec, LT Version"), + pytest.param("foo<1.2.3+rocm" , Version_("1.2.2+rocm"), True, id="LT Spec, Compatible Version with suffix"), + pytest.param("foo<1.2.3" , Version_("1.2.2+cuda"), False, id="LT Spec, Compatible Version, Extra Suffix"), + pytest.param("foo<1.2.3+cuda" , Version_("1.2.2") , False, id="LT Spec, Compatible Version, Missing Suffix"), + pytest.param("foo<1.2.3+cuda" , Version_("1.2.2+rocm"), False, id="LT Spec, Compatible Version, Mismatched Suffix"), + # LE -------------------------------------------------------------------------- + pytest.param("foo<=1.2.3" , Version_("1.2.3") , True, id="LE Spec, EQ Version"), + pytest.param("foo<=1.2.3" , Version_("1.2.5") , False, id="LE Spec, GT Version"), + pytest.param("foo<=1.2.3" , Version_("1.2.2") , True, id="LE Spec, LT Version"), + pytest.param("foo<=1.2.3+rocm", Version_("1.2.3+rocm"), True, id="LE Spec, Compatible Version with suffix"), + pytest.param("foo<=1.2.3" , Version_("1.2.3+cuda"), False, id="LE Spec, Compatible Version, Extra Suffix"), + pytest.param("foo<=1.2.3+cuda", Version_("1.2.3") , False, id="LE Spec, Compatible Version, Missing Suffix"), + pytest.param("foo<=1.2.3+cuda", Version_("1.2.3+rocm"), False, id="LE Spec, Compatible Version, Mismatched Suffix"), + # GT -------------------------------------------------------------------------- + pytest.param("foo>1.2.3" , Version_("1.2.3") , False, id="GT Spec, EQ Version"), + pytest.param("foo>1.2.3" , Version_("1.2.5") , True, id="GT Spec, GT Version"), + pytest.param("foo>1.2.3" , Version_("1.2.2") , False, id="GT Spec, LT Version"), + pytest.param("foo>1.2.3+rocm" , Version_("1.2.4+rocm"), True, id="GT Spec, Compatible Version with suffix"), + pytest.param("foo>1.2.3" , Version_("1.2.4+cuda"), False, id="GT Spec, Compatible Version, Extra Suffix"), + pytest.param("foo>1.2.3+cuda" , Version_("1.2.4") , False, id="GT Spec, Compatible Version, Missing Suffix"), + pytest.param("foo>1.2.3+cuda" , Version_("1.2.4+rocm"), False, id="GT Spec, Compatible Version, Mismatched Suffix"), + # GE -------------------------------------------------------------------------- + pytest.param("foo>=1.2.3" , Version_("1.2.3") , True, id="GE Spec, EQ Version"), + pytest.param("foo>=1.2.3" , Version_("1.2.5") , True, id="GE Spec, GT Version"), + pytest.param("foo>=1.2.3" , Version_("1.2.2") , False, id="GE Spec, LT Version"), + pytest.param("foo>=1.2.3+rocm", Version_("1.2.3+rocm"), True, id="GE Spec, Compatible Version with suffix"), + pytest.param("foo>=1.2.3" , Version_("1.2.3+cuda"), False, id="GE Spec, Compatible Version, Extra Suffix"), + pytest.param("foo>=1.2.3+cuda", Version_("1.2.3") , False, id="GE Spec, Compatible Version, Missing Suffix"), + pytest.param("foo>=1.2.3+cuda", Version_("1.2.3+rocm"), False, id="GE Spec, Compatible Version, Mismatched Suffix"), + ) +) +# fmt: on +def test_parse_requirement_comparison_fn(spec, ver, should_pass): + _, _, cmp = parse_requirement(spec) + assert cmp(ver) == should_pass + + +@pytest.mark.parametrize( + "spec, ctx", + ( + *( + pytest.param( + f"thing{symbol}", + pytest.raises(ValueError, match="Invalid requirement string:"), + id=f"No version w/ operator {symbol}", + ) + for symbol in _SUPPORTED_OPERATORS + ), + pytest.param( + "thing>=>1.2.3", + pytest.raises(ValueError, match="Invalid requirement string:"), + id="Operator too long", + ), + pytest.param( + "thing<>1.2.3", + pytest.raises(ValueError, match="Unrecognized comparison operator: <>"), + id="Nonsense operator", + ), + ), +) +def test_parse_requirement_errors_on_invalid_spec(spec, ctx): + with ctx: + parse_requirement(spec) diff --git a/tests/install/test_buildenv.py b/tests/install/test_buildenv.py index 21b9a49b8..a3964d413 100644 --- a/tests/install/test_buildenv.py +++ b/tests/install/test_buildenv.py @@ -25,8 +25,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import packaging import pytest -from pkg_resources import packaging # type: ignore from smartsim._core._install.buildenv import Version_ @@ -71,19 +71,32 @@ def test_version_equality_ne(): assert v1 != v2 - -def test_version_bad_input(): + # def test_version_bad_input(): """Test behavior when passing an invalid version string""" - v1 = Version_("abcdefg") + version = Version_("1") + assert version.major == 1 + with pytest.raises((IndexError, packaging.version.InvalidVersion)) as ex: + version.minor - # todo: fix behavior to ensure versions are valid. - assert v1 + version = Version_("2.") + with pytest.raises((IndexError, packaging.version.InvalidVersion)) as ex: + version.major + + version = Version_("3.0.") + + with pytest.raises((IndexError, packaging.version.InvalidVersion)) as ex: + version.major + + version = Version_("3.1.a") + assert version.major == 3 + assert version.minor == 1 + with pytest.raises((IndexError, packaging.version.InvalidVersion)) as ex: + version.patch def test_version_bad_parse_fail(): """Test behavior when trying to parse with an invalid input string""" - v1 = Version_("abcdefg") - # todo: ensure we can't take invalid input and have this IndexError occur. + version = Version_("abcdefg") with pytest.raises((IndexError, packaging.version.InvalidVersion)) as ex: - _ = v1.minor + version.major diff --git a/tests/install/test_builder.py b/tests/install/test_builder.py deleted file mode 100644 index feaf7e54f..000000000 --- a/tests/install/test_builder.py +++ /dev/null @@ -1,404 +0,0 @@ -# BSD 2-Clause License -# -# Copyright (c) 2021-2024, Hewlett Packard Enterprise -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -import functools -import pathlib -import textwrap -import time - -import pytest - -import smartsim._core._install.builder as build -from smartsim._core._install.buildenv import RedisAIVersion - -# The tests in this file belong to the group_a group -pytestmark = pytest.mark.group_a - -RAI_VERSIONS = RedisAIVersion("1.2.7") - -for_each_device = pytest.mark.parametrize( - "device", [build.Device.CPU, build.Device.GPU] -) - -_toggle_build_optional_backend = lambda backend: pytest.mark.parametrize( - f"build_{backend}", - [ - pytest.param(switch, id=f"with{'' if switch else 'out'}-{backend}") - for switch in (True, False) - ], -) -toggle_build_tf = _toggle_build_optional_backend("tf") -toggle_build_pt = _toggle_build_optional_backend("pt") -toggle_build_ort = _toggle_build_optional_backend("ort") - - -@pytest.mark.parametrize( - "mock_os", [pytest.param(os_, id=f"os='{os_}'") for os_ in ("Windows", "Java", "")] -) -def test_os_enum_raises_on_unsupported(mock_os): - with pytest.raises(build.BuildError, match="operating system") as err_info: - build.OperatingSystem.from_str(mock_os) - - -@pytest.mark.parametrize( - "mock_arch", - [ - pytest.param(arch_, id=f"arch='{arch_}'") - for arch_ in ("i386", "i686", "i86pc", "aarch64", "armv7l", "") - ], -) -def test_arch_enum_raises_on_unsupported(mock_arch): - with pytest.raises(build.BuildError, match="architecture"): - build.Architecture.from_str(mock_arch) - - -@pytest.fixture -def p_test_dir(test_dir): - yield pathlib.Path(test_dir).resolve() - - -@for_each_device -def test_rai_builder_raises_if_attempting_to_place_deps_when_build_dir_dne( - monkeypatch, p_test_dir, device -): - monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None) - monkeypatch.setattr( - build.RedisAIBuilder, - "rai_build_path", - property(lambda self: p_test_dir / "path/to/dir/that/dne"), - ) - rai_builder = build.RedisAIBuilder() - with pytest.raises(build.BuildError, match=r"build directory not found"): - rai_builder._fetch_deps_for(device) - - -@for_each_device -def test_rai_builder_raises_if_attempting_to_place_deps_in_nonempty_dir( - monkeypatch, p_test_dir, device -): - (p_test_dir / "some_file.txt").touch() - monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None) - monkeypatch.setattr( - build.RedisAIBuilder, "rai_build_path", property(lambda self: p_test_dir) - ) - monkeypatch.setattr( - build.RedisAIBuilder, "get_deps_dir_path_for", lambda *a, **kw: p_test_dir - ) - rai_builder = build.RedisAIBuilder() - - with pytest.raises(build.BuildError, match=r"is not empty"): - rai_builder._fetch_deps_for(device) - - -invalid_build_arm64 = [ - dict(build_tf=True, build_onnx=True), - dict(build_tf=False, build_onnx=True), - dict(build_tf=True, build_onnx=False), -] -invalid_build_ids = [ - ",".join([f"{key}={value}" for key, value in d.items()]) - for d in invalid_build_arm64 -] - - -@pytest.mark.parametrize("build_options", invalid_build_arm64, ids=invalid_build_ids) -def test_rai_builder_raises_if_unsupported_deps_on_arm64(build_options): - with pytest.raises(build.BuildError, match=r"are not supported on.*ARM64"): - build.RedisAIBuilder( - _os=build.OperatingSystem.DARWIN, - architecture=build.Architecture.ARM64, - **build_options, - ) - - -def _confirm_inst_presence(type_, should_be_present, seq): - expected_num_occurrences = 1 if should_be_present else 0 - occurrences = filter(lambda item: isinstance(item, type_), seq) - return expected_num_occurrences == len(tuple(occurrences)) - - -# Helper functions to check for the presence (or absence) of a -# ``_RAIBuildDependency`` dependency in a list of dependencies that need to be -# fetched by a ``RedisAIBuilder`` instance -dlpack_dep_presence = functools.partial( - _confirm_inst_presence, build._DLPackRepository, True -) -pt_dep_presence = functools.partial(_confirm_inst_presence, build._PTArchive) -tf_dep_presence = functools.partial(_confirm_inst_presence, build._TFArchive) -ort_dep_presence = functools.partial(_confirm_inst_presence, build._ORTArchive) - - -@for_each_device -@toggle_build_tf -@toggle_build_pt -@toggle_build_ort -def test_rai_builder_will_add_dep_if_backend_requested_wo_duplicates( - monkeypatch, device, build_tf, build_pt, build_ort -): - monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None) - - rai_builder = build.RedisAIBuilder( - build_tf=build_tf, build_torch=build_pt, build_onnx=build_ort - ) - requested_backends = rai_builder._get_deps_to_fetch_for(build.Device(device)) - assert dlpack_dep_presence(requested_backends) - assert tf_dep_presence(build_tf, requested_backends) - assert pt_dep_presence(build_pt, requested_backends) - assert ort_dep_presence(build_ort, requested_backends) - - -@for_each_device -@toggle_build_tf -@toggle_build_pt -def test_rai_builder_will_not_add_dep_if_custom_dep_path_provided( - monkeypatch, device, p_test_dir, build_tf, build_pt -): - monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None) - mock_ml_lib = p_test_dir / "some/ml/lib" - mock_ml_lib.mkdir(parents=True) - rai_builder = build.RedisAIBuilder( - build_tf=build_tf, - build_torch=build_pt, - build_onnx=False, - libtf_dir=str(mock_ml_lib if build_tf else ""), - torch_dir=str(mock_ml_lib if build_pt else ""), - ) - requested_backends = rai_builder._get_deps_to_fetch_for(device) - assert dlpack_dep_presence(requested_backends) - assert tf_dep_presence(False, requested_backends) - assert pt_dep_presence(False, requested_backends) - assert ort_dep_presence(False, requested_backends) - assert len(requested_backends) == 1 - - -def test_rai_builder_raises_if_it_fetches_an_unexpected_number_of_ml_deps( - monkeypatch, p_test_dir -): - monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None) - monkeypatch.setattr( - build.RedisAIBuilder, "rai_build_path", property(lambda self: p_test_dir) - ) - monkeypatch.setattr( - build, - "_place_rai_dep_at", - lambda target, verbose: lambda dep: target - / "whoops_all_ml_deps_extract_to_a_dir_with_this_name", - ) - rai_builder = build.RedisAIBuilder(build_tf=True, build_torch=True, build_onnx=True) - with pytest.raises( - build.BuildError, - match=r"Expected to place \d+ dependencies, but only found \d+", - ): - rai_builder._fetch_deps_for(build.Device.CPU) - - -def test_threaded_map(): - def _some_io_op(x): - return x * x - - assert (0, 1, 4, 9, 16) == tuple(build._threaded_map(_some_io_op, range(5))) - - -def test_threaded_map_returns_early_if_nothing_to_map(): - sleep_duration = 60 - - def _some_long_io_op(_): - time.sleep(sleep_duration) - - start = time.time() - build._threaded_map(_some_long_io_op, []) - end = time.time() - assert end - start < sleep_duration - - -def test_correct_pt_variant_os(): - # Check that all Linux variants return Linux - for linux_variant in build.OperatingSystem.LINUX.value: - os_ = build.OperatingSystem.from_str(linux_variant) - assert build._choose_pt_variant(os_) == build._PTArchiveLinux - - # Check that ARM64 and X86_64 Mac OSX return the Mac variant - all_archs = (build.Architecture.ARM64, build.Architecture.X64) - for arch in all_archs: - os_ = build.OperatingSystem.DARWIN - assert build._choose_pt_variant(os_) == build._PTArchiveMacOSX - - -def test_PTArchiveMacOSX_url(): - arch = build.Architecture.X64 - pt_version = RAI_VERSIONS.torch - - pt_linux_cpu = build._PTArchiveLinux( - build.Architecture.X64, build.Device.CPU, pt_version, False - ) - x64_prefix = "https://download.pytorch.org/libtorch/" - assert x64_prefix in pt_linux_cpu.url - - pt_macosx_cpu = build._PTArchiveMacOSX( - build.Architecture.ARM64, build.Device.CPU, pt_version, False - ) - arm64_prefix = "https://github.com/CrayLabs/ml_lib_builder/releases/download/" - assert arm64_prefix in pt_macosx_cpu.url - - -def test_PTArchiveMacOSX_gpu_error(): - with pytest.raises(build.BuildError, match="support GPU on Mac OSX"): - build._PTArchiveMacOSX( - build.Architecture.ARM64, build.Device.GPU, RAI_VERSIONS.torch, False - ).url - - -def test_valid_platforms(): - assert build.RedisAIBuilder( - _os=build.OperatingSystem.LINUX, - architecture=build.Architecture.X64, - build_tf=True, - build_torch=True, - build_onnx=True, - ) - assert build.RedisAIBuilder( - _os=build.OperatingSystem.DARWIN, - architecture=build.Architecture.X64, - build_tf=True, - build_torch=True, - build_onnx=False, - ) - assert build.RedisAIBuilder( - _os=build.OperatingSystem.DARWIN, - architecture=build.Architecture.X64, - build_tf=False, - build_torch=True, - build_onnx=False, - ) - - -@pytest.mark.parametrize( - "plat,cmd,expected_cmd", - [ - # Bare Word - pytest.param( - build.Platform(build.OperatingSystem.LINUX, build.Architecture.X64), - ["git", "clone", "my-repo"], - ["git", "clone", "my-repo"], - id="git-Linux-X64", - ), - pytest.param( - build.Platform(build.OperatingSystem.LINUX, build.Architecture.ARM64), - ["git", "clone", "my-repo"], - ["git", "clone", "my-repo"], - id="git-Linux-Arm64", - ), - pytest.param( - build.Platform(build.OperatingSystem.DARWIN, build.Architecture.X64), - ["git", "clone", "my-repo"], - ["git", "clone", "my-repo"], - id="git-Darwin-X64", - ), - pytest.param( - build.Platform(build.OperatingSystem.DARWIN, build.Architecture.ARM64), - ["git", "clone", "my-repo"], - [ - "git", - "clone", - "--config", - "core.autocrlf=false", - "--config", - "core.eol=lf", - "my-repo", - ], - id="git-Darwin-Arm64", - ), - # Abs path - pytest.param( - build.Platform(build.OperatingSystem.LINUX, build.Architecture.X64), - ["/path/to/git", "clone", "my-repo"], - ["/path/to/git", "clone", "my-repo"], - id="Abs-Linux-X64", - ), - pytest.param( - build.Platform(build.OperatingSystem.LINUX, build.Architecture.ARM64), - ["/path/to/git", "clone", "my-repo"], - ["/path/to/git", "clone", "my-repo"], - id="Abs-Linux-Arm64", - ), - pytest.param( - build.Platform(build.OperatingSystem.DARWIN, build.Architecture.X64), - ["/path/to/git", "clone", "my-repo"], - ["/path/to/git", "clone", "my-repo"], - id="Abs-Darwin-X64", - ), - pytest.param( - build.Platform(build.OperatingSystem.DARWIN, build.Architecture.ARM64), - ["/path/to/git", "clone", "my-repo"], - [ - "/path/to/git", - "clone", - "--config", - "core.autocrlf=false", - "--config", - "core.eol=lf", - "my-repo", - ], - id="Abs-Darwin-Arm64", - ), - ], -) -def test_git_commands_are_configered_correctly_for_platforms(plat, cmd, expected_cmd): - assert build.config_git_command(plat, cmd) == expected_cmd - - -def test_modify_source_files(p_test_dir): - def make_text_blurb(food): - return textwrap.dedent(f"""\ - My favorite food is {food} - {food} is an important part of a healthy breakfast - {food} {food} {food} {food} - This line should be unchanged! - --> {food} <-- - """) - - original_word = "SPAM" - mutated_word = "EGGS" - - source_files = [] - for i in range(3): - source_file = p_test_dir / f"test_{i}" - source_file.touch() - source_file.write_text(make_text_blurb(original_word)) - source_files.append(source_file) - # Modify a single file - build._modify_source_files(source_files[0], original_word, mutated_word) - assert source_files[0].read_text() == make_text_blurb(mutated_word) - assert source_files[1].read_text() == make_text_blurb(original_word) - assert source_files[2].read_text() == make_text_blurb(original_word) - - # Modify multiple files - build._modify_source_files( - (source_files[1], source_files[2]), original_word, mutated_word - ) - assert source_files[1].read_text() == make_text_blurb(mutated_word) - assert source_files[2].read_text() == make_text_blurb(mutated_word) diff --git a/tests/install/test_mlpackage.py b/tests/install/test_mlpackage.py new file mode 100644 index 000000000..d27e69b2b --- /dev/null +++ b/tests/install/test_mlpackage.py @@ -0,0 +1,122 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import pathlib +from unittest.mock import MagicMock + +import pytest + +from smartsim._core._install.mlpackages import ( + MLPackage, + MLPackageCollection, + RAIPatch, + load_platform_configs, +) +from smartsim._core._install.platform import Platform + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + +mock_platform = MagicMock(spec=Platform) + + +@pytest.fixture +def mock_ml_packages(): + foo = MagicMock(spec=MLPackage) + foo.name = "foo" + bar = MagicMock(spec=MLPackage) + bar.name = "bar" + yield [foo, bar] + + +@pytest.mark.parametrize( + "patch", + [MagicMock(spec=RAIPatch), [MagicMock(spec=RAIPatch) for i in range(3)], ()], + ids=["one patch", "multiple patches", "no patch"], +) +def test_mlpackage_constructor(patch): + MLPackage( + "foo", + "0.0.0", + "https://nothing.com", + ["bar==0.1", "baz==0.2"], + pathlib.Path("/nothing/fake"), + patch, + ) + + +def test_mlpackage_collection_constructor(mock_ml_packages): + MLPackageCollection(mock_platform, mock_ml_packages) + + +def test_mlpackage_collection_mutable_mapping_methods(mock_ml_packages): + ml_packages = MLPackageCollection(mock_platform, mock_ml_packages) + for val in ml_packages._ml_packages.values(): + val.version = "0.0.0" + assert ml_packages._ml_packages == ml_packages + + # Test iter + package_names = [pkg.name for pkg in mock_ml_packages] + assert [name for name in ml_packages] == package_names + + # Test get item + for pkg in mock_ml_packages: + assert ml_packages[pkg.name] is pkg + + # Test len + assert len(ml_packages) == len(mock_ml_packages) + + # Test delitem + key = next(iter(mock_ml_packages)).name + del ml_packages[key] + with pytest.raises(KeyError): + ml_packages[key] + assert len(ml_packages) == (len(mock_ml_packages) - 1) + + # Test setitem + with pytest.raises(TypeError): + ml_packages["baz"] = MagicMock(spec=MLPackage) + + # Test contains + name, package = next(iter(ml_packages.items())) + assert name in ml_packages + + # Test str + assert "Package" in str(ml_packages) + assert "Version" in str(ml_packages) + assert package.version in str(ml_packages) + assert name in str(ml_packages) + + +def test_load_configs_raises_when_dir_dne(test_dir): + dne_dir = pathlib.Path(test_dir, "dne") + dir_str = os.fspath(dne_dir) + with pytest.raises( + FileNotFoundError, + match=f"Platform configuration directory `{dir_str}` does not exist", + ): + load_platform_configs(dne_dir) diff --git a/tests/install/test_package_retriever.py b/tests/install/test_package_retriever.py new file mode 100644 index 000000000..d415ae235 --- /dev/null +++ b/tests/install/test_package_retriever.py @@ -0,0 +1,106 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import contextlib +import filecmp +import os +import pathlib +import random +import string +import tarfile +import zipfile + +import pytest + +from smartsim._core._install.utils import retrieve + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + + +@contextlib.contextmanager +def temp_cd(path): + original = os.getcwd() + os.chdir(path) + try: + yield + finally: + os.chdir(original) + + +def make_test_file(test_file): + data = "".join(random.choices(string.ascii_letters + string.digits, k=1024)) + with open(test_file, "w") as f: + f.write(data) + + +def test_local_archive_zip(test_dir): + with temp_cd(test_dir): + test_file = "./test.data" + make_test_file(test_file) + + zip_file = "./test.zip" + with zipfile.ZipFile(zip_file, "w") as f: + f.write(test_file) + + retrieve(zip_file, pathlib.Path("./output")) + + assert filecmp.cmp( + test_file, pathlib.Path("./output") / "test.data", shallow=False + ) + + +def test_local_archive_tgz(test_dir): + with temp_cd(test_dir): + test_file = "./test.data" + make_test_file(test_file) + + tgz_file = "./test.tgz" + with tarfile.open(tgz_file, "w:gz") as f: + f.add(test_file) + + retrieve(tgz_file, pathlib.Path("./output")) + + assert filecmp.cmp( + test_file, pathlib.Path("./output") / "test.data", shallow=False + ) + + +def test_git(test_dir): + retrieve( + "https://github.com/CrayLabs/SmartSim.git", + f"{test_dir}/smartsim_git", + branch="master", + ) + assert pathlib.Path(f"{test_dir}/smartsim_git").is_dir() + + +def test_https(test_dir): + output_dir = pathlib.Path(test_dir) / "output" + retrieve( + "https://github.com/CrayLabs/SmartSim/archive/refs/tags/v0.5.0.zip", output_dir + ) + assert output_dir.exists() diff --git a/tests/install/test_platform.py b/tests/install/test_platform.py new file mode 100644 index 000000000..76ff3f76b --- /dev/null +++ b/tests/install/test_platform.py @@ -0,0 +1,89 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json +import os +import platform + +import pytest + +from smartsim._core._install.platform import Architecture, Device, OperatingSystem + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + + +def test_device_cpu(): + cpu_enum = Device.CPU + assert not cpu_enum.is_gpu() + assert not cpu_enum.is_cuda() + assert not cpu_enum.is_rocm() + + +@pytest.mark.parametrize("cuda_device", Device.cuda_enums()) +def test_cuda(monkeypatch, test_dir, cuda_device): + version = cuda_device.value.split("-")[1] + fake_full_version = version + ".8888" ".9999" + monkeypatch.setenv("CUDA_HOME", test_dir) + + mock_version = dict(cuda=dict(version=fake_full_version)) + print(mock_version) + with open(f"{test_dir}/version.json", "w") as outfile: + json.dump(mock_version, outfile) + + assert Device.detect_cuda_version() == cuda_device + assert cuda_device.is_gpu() + assert cuda_device.is_cuda() + assert not cuda_device.is_rocm() + + +@pytest.mark.parametrize("rocm_device", Device.rocm_enums()) +def test_rocm(monkeypatch, test_dir, rocm_device): + version = rocm_device.value.split("-")[1] + fake_full_version = version + ".8888" + "-9999" + monkeypatch.setenv("ROCM_HOME", test_dir) + info_dir = f"{test_dir}/.info" + os.mkdir(info_dir) + + with open(f"{info_dir}/version", "w") as outfile: + outfile.write(fake_full_version) + + assert Device.detect_rocm_version() == rocm_device + assert rocm_device.is_gpu() + assert not rocm_device.is_cuda() + assert rocm_device.is_rocm() + + +@pytest.mark.parametrize("os", ("linux", "darwin")) +def test_operating_system(monkeypatch, os): + monkeypatch.setattr(platform, "system", lambda: os) + assert OperatingSystem.autodetect().value == os + + +@pytest.mark.parametrize("arch", ("x86_64", "arm64")) +def test_architecture(monkeypatch, arch): + monkeypatch.setattr(platform, "machine", lambda: arch) + assert Architecture.autodetect().value == arch diff --git a/tests/install/test_redisai_builder.py b/tests/install/test_redisai_builder.py new file mode 100644 index 000000000..81673a7f1 --- /dev/null +++ b/tests/install/test_redisai_builder.py @@ -0,0 +1,60 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from pathlib import Path + +import pytest + +from smartsim._core._install.buildenv import BuildEnv +from smartsim._core._install.mlpackages import ( + DEFAULT_MLPACKAGE_PATH, + MLPackage, + load_platform_configs, +) +from smartsim._core._install.platform import Platform +from smartsim._core._install.redisaiBuilder import RedisAIBuilder + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + +DEFAULT_MLPACKAGES = load_platform_configs(DEFAULT_MLPACKAGE_PATH) + + +@pytest.mark.parametrize( + "platform", + [platform for platform in DEFAULT_MLPACKAGES], + ids=[str(platform) for platform in DEFAULT_MLPACKAGES], +) +def test_backends_to_be_installed(monkeypatch, test_dir, platform): + mlpackages = DEFAULT_MLPACKAGES[platform] + monkeypatch.setattr(MLPackage, "retrieve", lambda *args, **kwargs: None) + builder = RedisAIBuilder(platform, mlpackages, BuildEnv(), Path(test_dir)) + + BACKENDS = ["libtorch", "libtensorflow", "onnxruntime"] + TOGGLES = ["build_torch", "build_tensorflow", "build_onnxruntime"] + + for backend, toggle in zip(BACKENDS, TOGGLES): + assert getattr(builder, toggle) == (backend in mlpackages) diff --git a/tests/test_batch_settings.py b/tests/test_batch_settings.py index db269a9b5..c4f365c39 100644 --- a/tests/test_batch_settings.py +++ b/tests/test_batch_settings.py @@ -64,7 +64,7 @@ def test_create_sbatch(): assert isinstance(slurm_batch, SbatchSettings) assert slurm_batch.batch_args["partition"] == "default" args = slurm_batch.format_batch_args() - assert args == [ + expected_args = [ "--exclusive", "--oversubscribe", "--nodes=1", @@ -72,6 +72,8 @@ def test_create_sbatch(): "--partition=default", "--account=myproject", ] + assert all(arg in expected_args for arg in args) + assert len(expected_args) == len(args) def test_create_bsub(): diff --git a/tests/test_cli.py b/tests/test_cli.py index 710a9a659..1cead7625 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -436,24 +436,23 @@ def mock_execute(ns: argparse.Namespace, _unparsed: t.Optional[t.List[str]] = No # fmt: off @pytest.mark.parametrize( - "command,mock_location,exp_output,optional_arg,exp_valid,exp_err_msg,check_prop,exp_prop_val", + "command, mock_location, exp_output, optional_arg, exp_valid, exp_err_msg, check_prop, exp_prop_val", [ - pytest.param("build", "build_execute", "verbose mocked-build", "-v", True, "", "v", True, id="verbose 'on'"), - pytest.param("build", "build_execute", "cpu mocked-build", "--device=cpu", True, "", "device", "cpu", id="device 'cpu'"), - pytest.param("build", "build_execute", "gpu mocked-build", "--device=gpu", True, "", "device", "gpu", id="device 'gpu'"), - pytest.param("build", "build_execute", "gpuX mocked-build", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="set bad device 'gpuX'"), - pytest.param("build", "build_execute", "no tensorflow mocked-build", "--no_tf", True, "", "no_tf", True, id="set no TF"), - pytest.param("build", "build_execute", "no torch mocked-build", "--no_pt", True, "", "no_pt", True, id="set no torch"), - pytest.param("build", "build_execute", "onnx mocked-build", "--onnx", True, "", "onnx", True, id="set w/onnx"), - pytest.param("build", "build_execute", "torch-dir mocked-build", "--torch_dir /foo/bar", True, "", "torch_dir", "/foo/bar", id="set torch dir"), - pytest.param("build", "build_execute", "bad-torch-dir mocked-build", "--torch_dir", False, "error: argument --torch_dir", "", "", id="set torch dir, no path"), - pytest.param("build", "build_execute", "keydb mocked-build", "--keydb", True, "", "keydb", True, id="keydb on"), - pytest.param("clean", "clean_execute", "clobbering mocked-clean", "--clobber", True, "", "clobber", True, id="clean w/clobber"), - pytest.param("validate", "validate_execute", "port mocked-validate", "--port=12345", True, "", "port", 12345, id="validate w/ manual port"), - pytest.param("validate", "validate_execute", "abbrv port mocked-validate", "-p 12345", True, "", "port", 12345, id="validate w/ manual abbreviated port"), - pytest.param("validate", "validate_execute", "cpu mocked-validate", "--device=cpu", True, "", "device", "cpu", id="validate: device 'cpu'"), - pytest.param("validate", "validate_execute", "gpu mocked-validate", "--device=gpu", True, "", "device", "gpu", id="validate: device 'gpu'"), - pytest.param("validate", "validate_execute", "gpuX mocked-validate", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="validate: set bad device 'gpuX'"), + pytest.param( "build", "build_execute", "verbose mocked-build", "-v", True, "", "v", True, id="verbose 'on'"), + pytest.param( "build", "build_execute", "cpu mocked-build", "--device=cpu", True, "", "device", "cpu", id="device 'cpu'"), + pytest.param( "build", "build_execute", "gpuX mocked-build", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="set bad device 'gpuX'"), + pytest.param( "build", "build_execute", "no tensorflow mocked-build", "--skip-tensorflow", True, "", "no_tf", True, id="Skip TF"), + pytest.param( "build", "build_execute", "no torch mocked-build", "--skip-torch", True, "", "no_pt", True, id="Skip Torch"), + pytest.param( "build", "build_execute", "onnx mocked-build", "--skip-onnx", True, "", "onnx", True, id="Skip Onnx"), + pytest.param( "build", "build_execute", "config-dir mocked-build", "--config-dir /foo/bar", True, "", "config-dir", "/foo/bar", id="set torch dir"), + pytest.param( "build", "build_execute", "bad-config-dir mocked-build", "--config-dir", False, "error: argument --config-dir", "", "", id="set config dir w/o path"), + pytest.param( "build", "build_execute", "keydb mocked-build", "--keydb", True, "", "keydb", True, id="keydb on"), + pytest.param( "clean", "clean_execute", "clobbering mocked-clean", "--clobber", True, "", "clobber", True, id="clean w/clobber"), + pytest.param("validate", "validate_execute", "port mocked-validate", "--port=12345", True, "", "port", 12345, id="validate w/ manual port"), + pytest.param("validate", "validate_execute", "abbrv port mocked-validate", "-p 12345", True, "", "port", 12345, id="validate w/ manual abbreviated port"), + pytest.param("validate", "validate_execute", "cpu mocked-validate", "--device=cpu", True, "", "device", "cpu", id="validate: device 'cpu'"), + pytest.param("validate", "validate_execute", "gpu mocked-validate", "--device=gpu", True, "", "device", "gpu", id="validate: device 'gpu'"), + pytest.param("validate", "validate_execute", "gpuX mocked-validate", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="validate: set bad device 'gpuX'"), ] ) # fmt: on @@ -735,15 +734,6 @@ def mock_operation(*args, **kwargs) -> int: monkeypatch.setattr(smartsim._core._cli.build, "tabulate", mock_operation) monkeypatch.setattr(smartsim._core._cli.build, "build_database", mock_operation) monkeypatch.setattr(smartsim._core._cli.build, "build_redis_ai", mock_operation) - monkeypatch.setattr( - smartsim._core._cli.build, "check_py_torch_version", mock_operation - ) - monkeypatch.setattr( - smartsim._core._cli.build, "check_py_tf_version", mock_operation - ) - monkeypatch.setattr( - smartsim._core._cli.build, "check_py_onnx_version", mock_operation - ) command = "build" cfg = MenuItemConfig( diff --git a/tests/test_dragon_client.py b/tests/test_dragon_client.py new file mode 100644 index 000000000..80257b610 --- /dev/null +++ b/tests/test_dragon_client.py @@ -0,0 +1,192 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os +import pathlib +import typing as t +from unittest.mock import MagicMock + +import pytest + +from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep +from smartsim.settings import DragonRunSettings +from smartsim.settings.slurmSettings import SbatchSettings + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + + +import smartsim._core.entrypoints.dragon_client as dragon_client +from smartsim._core.schemas.dragonRequests import * +from smartsim._core.schemas.dragonResponses import * + + +@pytest.fixture +def dragon_batch_step(test_dir: str) -> "DragonBatchStep": + """Fixture for creating a default batch of steps for a dragon launcher""" + test_path = pathlib.Path(test_dir) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # ensure the status_dir is set + status_dir = (test_path / ".smartsim" / "logs").as_posix() + batch_step.meta["status_dir"] = status_dir + + # create some steps to verify the requests file output changes + rs0 = DragonRunSettings(exe="sleep", exe_args=["1"]) + rs1 = DragonRunSettings(exe="sleep", exe_args=["2"]) + rs2 = DragonRunSettings(exe="sleep", exe_args=["3"]) + rs3 = DragonRunSettings(exe="sleep", exe_args=["4"]) + + names = "test00", "test01", "test02", "test03" + settings = rs0, rs1, rs2, rs3 + + # create steps with: + # no affinity, cpu affinity only, gpu affinity only, cpu and gpu affinity + cpu_affinities = [[], [0, 1, 2], [], [3, 4, 5, 6]] + gpu_affinities = [[], [], [0, 1, 2], [3, 4, 5, 6]] + + # assign some unique affinities to each run setting instance + for index, rs in enumerate(settings): + if gpu_affinities[index]: + rs.set_node_feature("gpu") + rs.set_cpu_affinity(cpu_affinities[index]) + rs.set_gpu_affinity(gpu_affinities[index]) + + steps = list( + DragonStep(name_, test_dir, rs_) for name_, rs_ in zip(names, settings) + ) + + for index, step in enumerate(steps): + # ensure meta is configured... + step.meta["status_dir"] = status_dir + # ... and put all the steps into the batch + batch_step.add_to_batch(steps[index]) + + return batch_step + + +def get_request_path_from_batch_script(launch_cmd: t.List[str]) -> pathlib.Path: + """Helper method for finding the path to a request file from the launch command""" + script_path = pathlib.Path(launch_cmd[-1]) + batch_script = script_path.read_text(encoding="utf-8") + batch_statements = [line for line in batch_script.split("\n") if line] + entrypoint_cmd = batch_statements[-1] + requests_file = pathlib.Path(entrypoint_cmd.split()[-1]) + return requests_file + + +def test_dragon_client_main_no_arg(monkeypatch: pytest.MonkeyPatch): + """Verify the client fails when the path to a submission file is not provided.""" + with pytest.raises(SystemExit): + dragon_client.cleanup = MagicMock() + dragon_client.main([]) + + # arg parser failures occur before resource allocation and should + # not result in resource cleanup being called + assert not dragon_client.cleanup.called + + +def test_dragon_client_main_empty_arg(test_dir: str): + """Verify the client fails when the path to a submission file is empty.""" + + with pytest.raises(ValueError) as ex: + dragon_client.cleanup = MagicMock() + dragon_client.main(["+submit", ""]) + + # verify it's a value error related to submit argument + assert "file not provided" in ex.value.args[0] + + # arg parser failures occur before resource allocation and should + # not result in resource cleanup being called + assert not dragon_client.cleanup.called + + +def test_dragon_client_main_bad_arg(test_dir: str): + """Verify the client returns a failure code when the path to a submission file is + invalid and does not raise an exception""" + path = pathlib.Path(test_dir) / "nonexistent_file.json" + + dragon_client.cleanup = MagicMock() + return_code = dragon_client.main(["+submit", str(path)]) + + # ensure non-zero return code + assert return_code != 0 + + # ensure failures do not block resource cleanup + assert dragon_client.cleanup.called + + +def test_dragon_client_main( + dragon_batch_step: DragonBatchStep, monkeypatch: pytest.MonkeyPatch +): + """Verify the client returns a failure code when the path to a submission file is + invalid and does not raise an exception""" + launch_cmd = dragon_batch_step.get_launch_cmd() + path = get_request_path_from_batch_script(launch_cmd) + num_requests_in_batch = 4 + num_shutdown_requests = 1 + request_count = num_requests_in_batch + num_shutdown_requests + submit_value = str(path) + + mock_connector = MagicMock() # DragonConnector + mock_connector.is_connected = True + mock_connector.send_request.return_value = DragonRunResponse(step_id="mock_step_id") + # mock can_monitor to exit before the infinite loop checking for shutdown + mock_connector.can_monitor = False + + mock_connector_class = MagicMock() + mock_connector_class.return_value = mock_connector + + # with monkeypatch.context() as ctx: + dragon_client.DragonConnector = mock_connector_class + dragon_client.cleanup = MagicMock() + + return_code = dragon_client.main(["+submit", submit_value]) + + # verify each request in the request file was processed + assert mock_connector.send_request.call_count == request_count + + # we know the batch fixture has a step with no affinity args supplied. skip it + for i in range(1, num_requests_in_batch): + sent_args = mock_connector.send_request.call_args_list[i][0] + request_arg = sent_args[0] + + assert isinstance(request_arg, DragonRunRequest) + + policy = request_arg.policy + + # make sure each policy has been read in correctly with valid affinity indices + assert len(policy.cpu_affinity) == len(set(policy.cpu_affinity)) + assert len(policy.gpu_affinity) == len(set(policy.gpu_affinity)) + + # we get a non-zero due to avoiding the infinite loop. consider refactoring + assert return_code == os.EX_IOERR + + # ensure failures do not block resource cleanup + assert dragon_client.cleanup.called diff --git a/tests/test_dragon_launcher.py b/tests/test_dragon_launcher.py index ee0fcb14b..4bd07e920 100644 --- a/tests/test_dragon_launcher.py +++ b/tests/test_dragon_launcher.py @@ -31,6 +31,7 @@ import sys import time import typing as t +from unittest.mock import MagicMock import pytest import zmq @@ -38,15 +39,74 @@ import smartsim._core.config from smartsim._core._cli.scripts.dragon_install import create_dotenv from smartsim._core.config.config import get_config -from smartsim._core.launcher.dragon.dragonLauncher import DragonConnector +from smartsim._core.launcher.dragon.dragonLauncher import ( + DragonConnector, + DragonLauncher, +) from smartsim._core.launcher.dragon.dragonSockets import ( get_authenticator, get_secure_socket, ) +from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep from smartsim._core.schemas.dragonRequests import DragonBootstrapRequest -from smartsim._core.schemas.dragonResponses import DragonHandshakeResponse +from smartsim._core.schemas.dragonResponses import ( + DragonHandshakeResponse, + DragonRunResponse, +) from smartsim._core.utils.network import IFConfig, find_free_port from smartsim._core.utils.security import KeyManager +from smartsim.error.errors import LauncherError +from smartsim.settings.dragonRunSettings import DragonRunSettings +from smartsim.settings.slurmSettings import SbatchSettings + + +@pytest.fixture +def dragon_batch_step(test_dir: str) -> DragonBatchStep: + """Fixture for creating a default batch of steps for a dragon launcher""" + test_path = pathlib.Path(test_dir) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # ensure the status_dir is set + status_dir = (test_path / ".smartsim" / "logs").as_posix() + batch_step.meta["status_dir"] = status_dir + + # create some steps to verify the requests file output changes + rs0 = DragonRunSettings(exe="sleep", exe_args=["1"]) + rs1 = DragonRunSettings(exe="sleep", exe_args=["2"]) + rs2 = DragonRunSettings(exe="sleep", exe_args=["3"]) + rs3 = DragonRunSettings(exe="sleep", exe_args=["4"]) + + names = "test00", "test01", "test02", "test03" + settings = rs0, rs1, rs2, rs3 + + # create steps with: + # no affinity, cpu affinity only, gpu affinity only, cpu and gpu affinity + cpu_affinities = [[], [0, 1, 2], [], [3, 4, 5, 6]] + gpu_affinities = [[], [], [0, 1, 2], [3, 4, 5, 6]] + + # assign some unique affinities to each run setting instance + for index, rs in enumerate(settings): + if gpu_affinities[index]: + rs.set_node_feature("gpu") + rs.set_cpu_affinity(cpu_affinities[index]) + rs.set_gpu_affinity(gpu_affinities[index]) + + steps = list( + DragonStep(name_, test_dir, rs_) for name_, rs_ in zip(names, settings) + ) + + for index, step in enumerate(steps): + # ensure meta is configured... + step.meta["status_dir"] = status_dir + # ... and put all the steps into the batch + batch_step.add_to_batch(steps[index]) + + return batch_step + # The tests in this file belong to the group_a group pytestmark = pytest.mark.group_a @@ -521,3 +581,168 @@ def test_merge_env(monkeypatch: pytest.MonkeyPatch, test_dir: str): # any non-dragon keys that didn't exist avoid unnecessary prepending assert merged_env[non_dragon_key] == non_dragon_value + + +def test_run_step_fail(test_dir: str) -> None: + """Verify that the dragon launcher still returns the step id + when the running step fails""" + test_path = pathlib.Path(test_dir) + status_dir = (test_path / ".smartsim" / "logs").as_posix() + + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + step0 = DragonStep("step0", test_dir, rs) + step0.meta["status_dir"] = status_dir + + mock_connector = MagicMock(spec=DragonConnector) + mock_connector.is_connected = True + mock_connector.send_request = MagicMock( + return_value=DragonRunResponse(step_id=step0.name, error_message="mock fail!") + ) + mock_connector.merge_persisted_env = MagicMock( + return_value={"FOO": "bar", "BAZ": "boop"} + ) + + launcher = DragonLauncher() + launcher._connector = mock_connector + + result = launcher.run(step0) + + # verify the failed step name is in the result + assert step0.name in result + + +def test_run_step_batch_empty(dragon_batch_step: DragonBatchStep) -> None: + """Verify that the dragon launcher behaves when asked to execute + a batch step that has no sub-steps""" + # remove the steps added in the batch fixture + dragon_batch_step.steps.clear() + + mock_step_id = "MOCK-STEPID" + mock_connector = MagicMock() # DragonConnector() + mock_connector.is_connected = True + mock_connector.send_request = MagicMock( + return_value=DragonRunResponse( + step_id=dragon_batch_step.name, error_message="mock fail!" + ) + ) + + launcher = DragonLauncher() + launcher._connector = mock_connector + launcher.task_manager.start_and_wait = MagicMock(return_value=(0, mock_step_id, "")) + + result = launcher.run(dragon_batch_step) + + # verify a step name is returned + assert result + # verify the batch step name is not in the result (renamed to SLURM-*) + assert dragon_batch_step.name not in result + + send_invocation = mock_connector.send_request + + # verify a batch request is not sent through the dragon connector + send_invocation.assert_not_called() + + +def test_run_step_batch_failure(dragon_batch_step: DragonBatchStep) -> None: + """Verify that the dragon launcher sends returns the step id + when the running step fails""" + mock_connector = MagicMock() # DragonConnector() + mock_connector.is_connected = True + mock_connector.send_request = MagicMock( + return_value=DragonRunResponse( + step_id=dragon_batch_step.name, error_message="mock fail!" + ) + ) + + mock_step_id = "MOCK-STEPID" + error_msg = "DOES_NOT_COMPUTE!" + launcher = DragonLauncher() + launcher._connector = mock_connector + launcher.task_manager.start_and_wait = MagicMock( + return_value=(1, mock_step_id, error_msg) + ) + + # a non-zero return code from the batch script should raise an error + with pytest.raises(LauncherError) as ex: + launcher.run(dragon_batch_step) + + # verify the correct error message is in the exception + assert error_msg in ex.value.args[0] + + +def test_run_step_success(test_dir: str) -> None: + """Verify that the dragon launcher sends the correctly formatted request for a step""" + test_path = pathlib.Path(test_dir) + status_dir = (test_path / ".smartsim" / "logs").as_posix() + + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + step0 = DragonStep("step0", test_dir, rs) + step0.meta["status_dir"] = status_dir + + mock_connector = MagicMock(spec=DragonConnector) + mock_connector.is_connected = True + mock_connector.send_request = MagicMock( + return_value=DragonRunResponse(step_id=step0.name) + ) + + launcher = DragonLauncher() + launcher._connector = mock_connector + mock_connector.merge_persisted_env = MagicMock( + return_value={"FOO": "bar", "BAZ": "boop"} + ) + + result = launcher.run(step0) + + # verify the successfully executed step name is in the result + assert step0.name in result + + # verify the DragonRunRequest sent matches all expectations + send_invocation = mock_connector.send_request + send_invocation.assert_called_once() + + args = send_invocation.call_args[0] # call_args == t.Tuple[args, kwargs] + + dragon_run_request = args[0] + req_name = dragon_run_request.name # name sent to dragon env + assert req_name.startswith(step0.name) + + req_policy_cpu_affinity = dragon_run_request.policy.cpu_affinity + assert not req_policy_cpu_affinity # default should be empty list + + req_policy_gpu_affinity = dragon_run_request.policy.gpu_affinity + assert not req_policy_gpu_affinity # default should be empty list + + +def test_run_step_success_batch( + monkeypatch: pytest.MonkeyPatch, dragon_batch_step: DragonBatchStep +) -> None: + """Verify that the dragon launcher sends the correctly formatted request + for a batch step""" + mock_connector = MagicMock() # DragonConnector() + mock_connector.is_connected = True + mock_connector.send_request = MagicMock( + return_value=DragonRunResponse(step_id=dragon_batch_step.name) + ) + + launcher = DragonLauncher() + launcher._connector = mock_connector + launcher.task_manager.start_and_wait = MagicMock(return_value=(0, "success", "")) + + result = launcher.run(dragon_batch_step) + + # verify the successfully executed step name is in the result + assert dragon_batch_step.name not in result + assert result + + send_invocation = mock_connector.send_request + + # verify a batch request is not sent through the dragon connector + send_invocation.assert_not_called() + launcher.task_manager.start_and_wait.assert_called_once() + + args = launcher.task_manager.start_and_wait.call_args[0] + + # verify the batch script is executed + launch_cmd = dragon_batch_step.get_launch_cmd() + for stmt in launch_cmd: + assert stmt in args[0] # args[0] is the cmd list sent to subprocess.Popen diff --git a/tests/test_dragon_run_policy.py b/tests/test_dragon_run_policy.py new file mode 100644 index 000000000..1d8d069fa --- /dev/null +++ b/tests/test_dragon_run_policy.py @@ -0,0 +1,371 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pathlib + +import pytest + +from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep +from smartsim.settings.dragonRunSettings import DragonRunSettings +from smartsim.settings.slurmSettings import SbatchSettings + +try: + from dragon.infrastructure.policy import Policy + + import smartsim._core.entrypoints.dragon as drg + from smartsim._core.launcher.dragon.dragonBackend import DragonBackend + + dragon_loaded = True +except: + dragon_loaded = False + +# The tests in this file belong to the group_b group +pytestmark = pytest.mark.group_b + +from smartsim._core.schemas.dragonRequests import * +from smartsim._core.schemas.dragonResponses import * + + +@pytest.fixture +def dragon_batch_step(test_dir: str) -> "DragonBatchStep": + """Fixture for creating a default batch of steps for a dragon launcher""" + test_path = pathlib.Path(test_dir) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # ensure the status_dir is set + status_dir = (test_path / ".smartsim" / "logs").as_posix() + batch_step.meta["status_dir"] = status_dir + + # create some steps to verify the requests file output changes + rs0 = DragonRunSettings(exe="sleep", exe_args=["1"]) + rs1 = DragonRunSettings(exe="sleep", exe_args=["2"]) + rs2 = DragonRunSettings(exe="sleep", exe_args=["3"]) + rs3 = DragonRunSettings(exe="sleep", exe_args=["4"]) + + names = "test00", "test01", "test02", "test03" + settings = rs0, rs1, rs2, rs3 + + # create steps with: + # no affinity, cpu affinity only, gpu affinity only, cpu and gpu affinity + cpu_affinities = [[], [0, 1, 2], [], [3, 4, 5, 6]] + gpu_affinities = [[], [], [0, 1, 2], [3, 4, 5, 6]] + + # assign some unique affinities to each run setting instance + for index, rs in enumerate(settings): + if gpu_affinities[index]: + rs.set_node_feature("gpu") + rs.set_cpu_affinity(cpu_affinities[index]) + rs.set_gpu_affinity(gpu_affinities[index]) + + steps = list( + DragonStep(name_, test_dir, rs_) for name_, rs_ in zip(names, settings) + ) + + for index, step in enumerate(steps): + # ensure meta is configured... + step.meta["status_dir"] = status_dir + # ... and put all the steps into the batch + batch_step.add_to_batch(steps[index]) + + return batch_step + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +@pytest.mark.parametrize( + "dragon_request", + [ + pytest.param(DragonHandshakeRequest(), id="DragonHandshakeRequest"), + pytest.param(DragonShutdownRequest(), id="DragonShutdownRequest"), + pytest.param( + DragonBootstrapRequest(address="localhost"), id="DragonBootstrapRequest" + ), + ], +) +def test_create_run_policy_non_run_request(dragon_request: DragonRequest) -> None: + """Verify that a default policy is returned when a request is + not attempting to start a new proccess (e.g. a DragonRunRequest)""" + policy = DragonBackend.create_run_policy(dragon_request, "localhost") + + assert policy is not None, "Default policy was not returned" + assert ( + policy.device == Policy.Device.DEFAULT + ), "Default device was not Device.DEFAULT" + assert policy.cpu_affinity == [], "Default cpu affinity was not empty" + assert policy.gpu_affinity == [], "Default gpu affinity was not empty" + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_create_run_policy_run_request_no_run_policy() -> None: + """Verify that a policy specifying no policy is returned with all default + values (no device, empty cpu & gpu affinity)""" + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + # policy= # <--- skipping this + ) + + policy = DragonBackend.create_run_policy(run_req, "localhost") + + assert policy.device == Policy.Device.DEFAULT + assert set(policy.cpu_affinity) == set() + assert policy.gpu_affinity == [] + assert policy.affinity == Policy.Affinity.DEFAULT + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_create_run_policy_run_request_default_run_policy() -> None: + """Verify that a policy specifying no affinity is returned with + default value for device and empty affinity lists""" + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(), # <--- passing default values + ) + + policy = DragonBackend.create_run_policy(run_req, "localhost") + + assert set(policy.cpu_affinity) == set() + assert set(policy.gpu_affinity) == set() + assert policy.affinity == Policy.Affinity.DEFAULT + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_create_run_policy_run_request_cpu_affinity_no_device() -> None: + """Verify that a input policy specifying a CPU affinity but lacking the device field + produces a Dragon Policy with the CPU device specified""" + affinity = set([0, 2, 4]) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(cpu_affinity=list(affinity)), # <-- no device spec + ) + + policy = DragonBackend.create_run_policy(run_req, "localhost") + + assert set(policy.cpu_affinity) == affinity + assert policy.gpu_affinity == [] + assert policy.affinity == Policy.Affinity.SPECIFIC + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_create_run_policy_run_request_cpu_affinity() -> None: + """Verify that a policy specifying CPU affinity is returned as expected""" + affinity = set([0, 2, 4]) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(cpu_affinity=list(affinity)), + ) + + policy = DragonBackend.create_run_policy(run_req, "localhost") + + assert set(policy.cpu_affinity) == affinity + assert policy.gpu_affinity == [] + assert policy.affinity == Policy.Affinity.SPECIFIC + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_create_run_policy_run_request_gpu_affinity() -> None: + """Verify that a policy specifying GPU affinity is returned as expected""" + affinity = set([0, 2, 4]) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(device="gpu", gpu_affinity=list(affinity)), + ) + + policy = DragonBackend.create_run_policy(run_req, "localhost") + + assert policy.cpu_affinity == [] + assert set(policy.gpu_affinity) == set(affinity) + assert policy.affinity == Policy.Affinity.SPECIFIC + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_dragon_run_policy_from_run_args() -> None: + """Verify that a DragonRunPolicy is created from a dictionary of run arguments""" + run_args = { + "gpu-affinity": "0,1,2", + "cpu-affinity": "3,4,5,6", + } + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [3, 4, 5, 6] + assert policy.gpu_affinity == [0, 1, 2] + + +def test_dragon_run_policy_from_run_args_empty() -> None: + """Verify that a DragonRunPolicy is created from an empty + dictionary of run arguments""" + run_args = {} + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [] + assert policy.gpu_affinity == [] + + +def test_dragon_run_policy_from_run_args_cpu_affinity() -> None: + """Verify that a DragonRunPolicy is created from a dictionary + of run arguments containing a CPU affinity""" + run_args = { + "cpu-affinity": "3,4,5,6", + } + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [3, 4, 5, 6] + assert policy.gpu_affinity == [] + + +def test_dragon_run_policy_from_run_args_gpu_affinity() -> None: + """Verify that a DragonRunPolicy is created from a dictionary + of run arguments containing a GPU affinity""" + run_args = { + "gpu-affinity": "0, 1, 2", + } + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [] + assert policy.gpu_affinity == [0, 1, 2] + + +def test_dragon_run_policy_from_run_args_invalid_gpu_affinity() -> None: + """Verify that a DragonRunPolicy is NOT created from a dictionary + of run arguments with an invalid GPU affinity""" + run_args = { + "gpu-affinity": "0,-1,2", + } + + with pytest.raises(SmartSimError) as ex: + DragonRunPolicy.from_run_args(run_args) + + assert "DragonRunPolicy" in ex.value.args[0] + + +def test_dragon_run_policy_from_run_args_invalid_cpu_affinity() -> None: + """Verify that a DragonRunPolicy is NOT created from a dictionary + of run arguments with an invalid CPU affinity""" + run_args = { + "cpu-affinity": "3,4,5,-6", + } + + with pytest.raises(SmartSimError) as ex: + DragonRunPolicy.from_run_args(run_args) + + assert "DragonRunPolicy" in ex.value.args[0] + + +def test_dragon_run_policy_from_run_args_ignore_empties_gpu() -> None: + """Verify that a DragonRunPolicy is created from a dictionary + of run arguments and ignores empty values in the serialized gpu list""" + run_args = { + "gpu-affinity": "0,,2", + } + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [] + assert policy.gpu_affinity == [0, 2] + + +def test_dragon_run_policy_from_run_args_ignore_empties_cpu() -> None: + """Verify that a DragonRunPolicy is created from a dictionary + of run arguments and ignores empty values in the serialized cpu list""" + run_args = { + "cpu-affinity": "3,4,,6,", + } + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [3, 4, 6] + assert policy.gpu_affinity == [] + + +def test_dragon_run_policy_from_run_args_null_gpu_affinity() -> None: + """Verify that a DragonRunPolicy is created if a null value is encountered + in the gpu-affinity list""" + run_args = { + "gpu-affinity": None, + "cpu-affinity": "3,4,5,6", + } + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [3, 4, 5, 6] + assert policy.gpu_affinity == [] + + +def test_dragon_run_policy_from_run_args_null_cpu_affinity() -> None: + """Verify that a DragonRunPolicy is created if a null value is encountered + in the cpu-affinity list""" + run_args = {"gpu-affinity": "0,1,2", "cpu-affinity": None} + + policy = DragonRunPolicy.from_run_args(run_args) + + assert policy.cpu_affinity == [] + assert policy.gpu_affinity == [0, 1, 2] diff --git a/tests/test_dragon_backend.py b/tests/test_dragon_run_request.py similarity index 64% rename from tests/test_dragon_backend.py rename to tests/test_dragon_run_request.py index a510f660a..7514deab1 100644 --- a/tests/test_dragon_backend.py +++ b/tests/test_dragon_run_request.py @@ -31,19 +31,17 @@ from unittest.mock import MagicMock import pytest +from pydantic import ValidationError # The tests in this file belong to the group_b group -pytestmark = pytest.mark.group_a +pytestmark = pytest.mark.group_b try: import dragon -except ImportError: - pass -else: - pytest.skip( - reason="Using dragon as launcher, not running Dragon unit tests", - allow_module_level=True, - ) + + dragon_loaded = True +except: + dragon_loaded = False from smartsim._core.config import CONFIG from smartsim._core.schemas.dragonRequests import * @@ -59,10 +57,36 @@ class NodeMock(MagicMock): + def __init__( + self, name: t.Optional[str] = None, num_gpus: int = 2, num_cpus: int = 8 + ) -> None: + super().__init__() + self._mock_id = name + NodeMock._num_gpus = num_gpus + NodeMock._num_cpus = num_cpus + @property def hostname(self) -> str: + if self._mock_id: + return self._mock_id return create_short_id_str() + @property + def num_cpus(self) -> str: + return NodeMock._num_cpus + + @property + def num_gpus(self) -> str: + return NodeMock._num_gpus + + def _set_id(self, value: str) -> None: + self._mock_id = value + + def gpus(self, parent: t.Any = None) -> t.List[str]: + if self._num_gpus: + return [f"{self.hostname}-gpu{i}" for i in range(NodeMock._num_gpus)] + return [] + class GroupStateMock(MagicMock): def Running(self) -> MagicMock: @@ -78,13 +102,19 @@ class ProcessGroupMock(MagicMock): puids = [121, 122] -def get_mock_backend(monkeypatch: pytest.MonkeyPatch) -> "DragonBackend": +def node_mock() -> NodeMock: + return NodeMock() + + +def get_mock_backend( + monkeypatch: pytest.MonkeyPatch, num_gpus: int = 2 +) -> "DragonBackend": process_mock = MagicMock(returncode=0) process_group_mock = MagicMock(**{"Process.return_value": ProcessGroupMock()}) process_module_mock = MagicMock() process_module_mock.Process = process_mock - node_mock = NodeMock() + node_mock = NodeMock(num_gpus=num_gpus) system_mock = MagicMock(nodes=["node1", "node2", "node3"]) monkeypatch.setitem( sys.modules, @@ -189,6 +219,7 @@ def set_mock_group_infos( return group_infos +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_handshake_request(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) @@ -199,6 +230,7 @@ def test_handshake_request(monkeypatch: pytest.MonkeyPatch) -> None: assert handshake_resp.dragon_pid == 99999 +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_run_request(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) run_req = DragonRunRequest( @@ -249,6 +281,7 @@ def test_run_request(monkeypatch: pytest.MonkeyPatch) -> None: assert not dragon_backend._running_steps +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_deny_run_request(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) @@ -274,6 +307,78 @@ def test_deny_run_request(monkeypatch: pytest.MonkeyPatch) -> None: assert dragon_backend.group_infos[step_id].status == SmartSimStatus.STATUS_FAILED +def test_run_request_with_empty_policy(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that a policy is applied to a run request""" + dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=None, + ) + assert run_req.policy is None + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_run_request_with_policy(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that a policy is applied to a run request""" + dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(cpu_affinity=[0, 1]), + ) + + run_resp = dragon_backend.process_request(run_req) + assert isinstance(run_resp, DragonRunResponse) + + step_id = run_resp.step_id + assert dragon_backend._queued_steps[step_id] == run_req + + mock_process_group = MagicMock(puids=[123, 124]) + + dragon_backend._group_infos[step_id].process_group = mock_process_group + dragon_backend._group_infos[step_id].puids = [123, 124] + dragon_backend._start_steps() + + assert dragon_backend._running_steps == [step_id] + assert len(dragon_backend._queued_steps) == 0 + assert len(dragon_backend._free_hosts) == 1 + assert dragon_backend._allocated_hosts[dragon_backend.hosts[0]] == step_id + assert dragon_backend._allocated_hosts[dragon_backend.hosts[1]] == step_id + + monkeypatch.setattr( + dragon_backend._group_infos[step_id].process_group, "status", "Running" + ) + + dragon_backend._update() + + assert dragon_backend._running_steps == [step_id] + assert len(dragon_backend._queued_steps) == 0 + assert len(dragon_backend._free_hosts) == 1 + assert dragon_backend._allocated_hosts[dragon_backend.hosts[0]] == step_id + assert dragon_backend._allocated_hosts[dragon_backend.hosts[1]] == step_id + + dragon_backend._group_infos[step_id].status = SmartSimStatus.STATUS_CANCELLED + + dragon_backend._update() + assert not dragon_backend._running_steps + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_udpate_status_request(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) @@ -290,6 +395,7 @@ def test_udpate_status_request(monkeypatch: pytest.MonkeyPatch) -> None: } +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_stop_request(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) group_infos = set_mock_group_infos(monkeypatch, dragon_backend) @@ -321,6 +427,7 @@ def test_stop_request(monkeypatch: pytest.MonkeyPatch) -> None: assert len(dragon_backend._free_hosts) == 3 +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") @pytest.mark.parametrize( "immediate, kill_jobs, frontend_shutdown", [ @@ -379,6 +486,7 @@ def test_shutdown_request( assert dragon_backend._has_cooled_down == kill_jobs +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") @pytest.mark.parametrize("telemetry_flag", ["0", "1"]) def test_cooldown_is_set(monkeypatch: pytest.MonkeyPatch, telemetry_flag: str) -> None: monkeypatch.setenv("SMARTSIM_FLAG_TELEMETRY", telemetry_flag) @@ -394,6 +502,7 @@ def test_cooldown_is_set(monkeypatch: pytest.MonkeyPatch, telemetry_flag: str) - assert dragon_backend.cooldown_period == expected_cooldown +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_heartbeat_and_time(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) first_heartbeat = dragon_backend.last_heartbeat @@ -402,6 +511,7 @@ def test_heartbeat_and_time(monkeypatch: pytest.MonkeyPatch) -> None: assert dragon_backend.last_heartbeat > first_heartbeat +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") @pytest.mark.parametrize("num_nodes", [1, 3, 100]) def test_can_honor(monkeypatch: pytest.MonkeyPatch, num_nodes: int) -> None: dragon_backend = get_mock_backend(monkeypatch) @@ -422,6 +532,119 @@ def test_can_honor(monkeypatch: pytest.MonkeyPatch, num_nodes: int) -> None: ) +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +@pytest.mark.parametrize("affinity", [[0], [0, 1], list(range(8))]) +def test_can_honor_cpu_affinity( + monkeypatch: pytest.MonkeyPatch, affinity: t.List[int] +) -> None: + """Verify that valid CPU affinities are accepted""" + dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(cpu_affinity=affinity), + ) + + assert dragon_backend._can_honor(run_req)[0] + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_can_honor_cpu_affinity_out_of_range(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that invalid CPU affinities are NOT accepted + NOTE: negative values are captured by the Pydantic schema""" + dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(cpu_affinity=list(range(9))), + ) + + assert not dragon_backend._can_honor(run_req)[0] + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +@pytest.mark.parametrize("affinity", [[0], [0, 1]]) +def test_can_honor_gpu_affinity( + monkeypatch: pytest.MonkeyPatch, affinity: t.List[int] +) -> None: + """Verify that valid GPU affinities are accepted""" + dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(gpu_affinity=affinity), + ) + + assert dragon_backend._can_honor(run_req)[0] + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_can_honor_gpu_affinity_out_of_range(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that invalid GPU affinities are NOT accepted + NOTE: negative values are captured by the Pydantic schema""" + dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(gpu_affinity=list(range(3))), + ) + + assert not dragon_backend._can_honor(run_req)[0] + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") +def test_can_honor_gpu_device_not_available(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that a request for a GPU if none exists is not accepted""" + + # create a mock node class that always reports no GPUs available + dragon_backend = get_mock_backend(monkeypatch, num_gpus=0) + + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + # specify GPU device w/no affinity + policy=DragonRunPolicy(gpu_affinity=[0]), + ) + + assert not dragon_backend._can_honor(run_req)[0] + + +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_get_id(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) step_id = next(dragon_backend._step_ids) @@ -430,6 +653,7 @@ def test_get_id(monkeypatch: pytest.MonkeyPatch) -> None: assert step_id != next(dragon_backend._step_ids) +@pytest.mark.skipif(not dragon_loaded, reason="Test is only for Dragon WLM systems") def test_view(monkeypatch: pytest.MonkeyPatch) -> None: dragon_backend = get_mock_backend(monkeypatch) set_mock_group_infos(monkeypatch, dragon_backend) @@ -437,17 +661,21 @@ def test_view(monkeypatch: pytest.MonkeyPatch) -> None: expected_message = textwrap.dedent(f"""\ Dragon server backend update - | Host | Status | - |---------|----------| + | Host | Status | + |--------|----------| | {hosts[0]} | Busy | | {hosts[1]} | Free | | {hosts[2]} | Free | | Step | Status | Hosts | Return codes | Num procs | - |----------|--------------|-----------------|----------------|-------------| + |----------|--------------|-------------|----------------|-------------| | abc123-1 | Running | {hosts[0]} | | 1 | | del999-2 | Cancelled | {hosts[1]} | -9 | 1 | | c101vz-3 | Completed | {hosts[1]},{hosts[2]} | 0 | 2 | | 0ghjk1-4 | Failed | {hosts[2]} | -1 | 1 | | ljace0-5 | NeverStarted | | | 0 |""") - assert dragon_backend.status_message == expected_message + # get rid of white space to make the comparison easier + actual_msg = dragon_backend.status_message.replace(" ", "") + expected_message = expected_message.replace(" ", "") + + assert actual_msg == expected_message diff --git a/tests/test_dragon_run_request_nowlm.py b/tests/test_dragon_run_request_nowlm.py new file mode 100644 index 000000000..3dd7099c8 --- /dev/null +++ b/tests/test_dragon_run_request_nowlm.py @@ -0,0 +1,105 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +from pydantic import ValidationError + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + +from smartsim._core.schemas.dragonRequests import * +from smartsim._core.schemas.dragonResponses import * + + +def test_run_request_with_null_policy(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that an empty policy does not cause an error""" + # dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=None, + ) + assert run_req.policy is None + + +def test_run_request_with_empty_policy(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that a non-empty policy is set correctly""" + # dragon_backend = get_mock_backend(monkeypatch) + run_req = DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy(), + ) + assert run_req.policy is not None + assert not run_req.policy.cpu_affinity + assert not run_req.policy.gpu_affinity + + +@pytest.mark.parametrize( + "device,cpu_affinity,gpu_affinity", + [ + pytest.param("cpu", [-1], [], id="cpu_affinity"), + pytest.param("gpu", [], [-1], id="gpu_affinity"), + ], +) +def test_run_request_with_negative_affinity( + device: str, + cpu_affinity: t.List[int], + gpu_affinity: t.List[int], +) -> None: + """Verify that invalid affinity values fail validation""" + with pytest.raises(ValidationError) as ex: + DragonRunRequest( + exe="sleep", + exe_args=["5"], + path="/a/fake/path", + nodes=2, + tasks=1, + tasks_per_node=1, + env={}, + current_env={}, + pmi_enabled=False, + policy=DragonRunPolicy( + cpu_affinity=cpu_affinity, gpu_affinity=gpu_affinity + ), + ) + + assert f"{device}_affinity" in str(ex.value) + assert "greater than or equal to 0" in str(ex.value) diff --git a/tests/test_dragon_runsettings.py b/tests/test_dragon_runsettings.py new file mode 100644 index 000000000..34e8510e8 --- /dev/null +++ b/tests/test_dragon_runsettings.py @@ -0,0 +1,98 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +from smartsim.settings import DragonRunSettings + +# The tests in this file belong to the group_b group +pytestmark = pytest.mark.group_a + + +def test_dragon_runsettings_nodes(): + """Verify that node count is set correctly""" + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + exp_value = 3 + rs.set_nodes(exp_value) + assert rs.run_args["nodes"] == exp_value + + exp_value = 9 + rs.set_nodes(exp_value) + assert rs.run_args["nodes"] == exp_value + + +def test_dragon_runsettings_tasks_per_node(): + """Verify that tasks per node is set correctly""" + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + exp_value = 3 + rs.set_tasks_per_node(exp_value) + assert rs.run_args["tasks-per-node"] == exp_value + + exp_value = 7 + rs.set_tasks_per_node(exp_value) + assert rs.run_args["tasks-per-node"] == exp_value + + +def test_dragon_runsettings_cpu_affinity(): + """Verify that the CPU affinity is set correctly""" + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + exp_value = [0, 1, 2, 3] + rs.set_cpu_affinity([0, 1, 2, 3]) + assert rs.run_args["cpu-affinity"] == ",".join(str(val) for val in exp_value) + + # ensure the value is not changed when we extend the list + exp_value.extend([4, 5, 6]) + assert rs.run_args["cpu-affinity"] != ",".join(str(val) for val in exp_value) + + rs.set_cpu_affinity(exp_value) + assert rs.run_args["cpu-affinity"] == ",".join(str(val) for val in exp_value) + + # ensure the value is not changed when we extend the list + rs.run_args["cpu-affinity"] = "7,8,9" + assert rs.run_args["cpu-affinity"] != ",".join(str(val) for val in exp_value) + + +def test_dragon_runsettings_gpu_affinity(): + """Verify that the GPU affinity is set correctly""" + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + exp_value = [0, 1, 2, 3] + rs.set_gpu_affinity([0, 1, 2, 3]) + assert rs.run_args["gpu-affinity"] == ",".join(str(val) for val in exp_value) + + # ensure the value is not changed when we extend the list + exp_value.extend([4, 5, 6]) + assert rs.run_args["gpu-affinity"] != ",".join(str(val) for val in exp_value) + + rs.set_gpu_affinity(exp_value) + assert rs.run_args["gpu-affinity"] == ",".join(str(val) for val in exp_value) + + # ensure the value is not changed when we extend the list + rs.run_args["gpu-affinity"] = "7,8,9" + assert rs.run_args["gpu-affinity"] != ",".join(str(val) for val in exp_value) diff --git a/tests/test_dragon_step.py b/tests/test_dragon_step.py new file mode 100644 index 000000000..19f408e0b --- /dev/null +++ b/tests/test_dragon_step.py @@ -0,0 +1,394 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json +import pathlib +import shutil +import sys +import typing as t + +import pytest + +from smartsim._core.launcher.step.dragonStep import DragonBatchStep, DragonStep +from smartsim.settings import DragonRunSettings +from smartsim.settings.pbsSettings import QsubBatchSettings +from smartsim.settings.slurmSettings import SbatchSettings + +# The tests in this file belong to the group_a group +pytestmark = pytest.mark.group_a + + +from smartsim._core.schemas.dragonRequests import * +from smartsim._core.schemas.dragonResponses import * + + +@pytest.fixture +def dragon_batch_step(test_dir: str) -> DragonBatchStep: + """Fixture for creating a default batch of steps for a dragon launcher""" + test_path = pathlib.Path(test_dir) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # ensure the status_dir is set + status_dir = (test_path / ".smartsim" / "logs").as_posix() + batch_step.meta["status_dir"] = status_dir + + # create some steps to verify the requests file output changes + rs0 = DragonRunSettings(exe="sleep", exe_args=["1"]) + rs1 = DragonRunSettings(exe="sleep", exe_args=["2"]) + rs2 = DragonRunSettings(exe="sleep", exe_args=["3"]) + rs3 = DragonRunSettings(exe="sleep", exe_args=["4"]) + + names = "test00", "test01", "test02", "test03" + settings = rs0, rs1, rs2, rs3 + + # create steps with: + # no affinity, cpu affinity only, gpu affinity only, cpu and gpu affinity + cpu_affinities = [[], [0, 1, 2], [], [3, 4, 5, 6]] + gpu_affinities = [[], [], [0, 1, 2], [3, 4, 5, 6]] + + # assign some unique affinities to each run setting instance + for index, rs in enumerate(settings): + if gpu_affinities[index]: + rs.set_node_feature("gpu") + rs.set_cpu_affinity(cpu_affinities[index]) + rs.set_gpu_affinity(gpu_affinities[index]) + + steps = list( + DragonStep(name_, test_dir, rs_) for name_, rs_ in zip(names, settings) + ) + + for index, step in enumerate(steps): + # ensure meta is configured... + step.meta["status_dir"] = status_dir + # ... and put all the steps into the batch + batch_step.add_to_batch(steps[index]) + + return batch_step + + +def get_request_path_from_batch_script(launch_cmd: t.List[str]) -> pathlib.Path: + """Helper method for finding the path to a request file from the launch command""" + script_path = pathlib.Path(launch_cmd[-1]) + batch_script = script_path.read_text(encoding="utf-8") + batch_statements = [line for line in batch_script.split("\n") if line] + entrypoint_cmd = batch_statements[-1] + requests_file = pathlib.Path(entrypoint_cmd.split()[-1]) + return requests_file + + +def test_dragon_step_creation(test_dir: str) -> None: + """Verify that the step is created with the values provided""" + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + original_name = "test" + step = DragonStep(original_name, test_dir, rs) + + # confirm the name has been made unique to avoid conflicts + assert step.name != original_name + assert step.entity_name == original_name + assert step.cwd == test_dir + assert step.step_settings is not None + + +def test_dragon_step_name_uniqueness(test_dir: str) -> None: + """Verify that step name is unique and independent of step content""" + + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + original_name = "test" + + num_steps = 100 + steps = [DragonStep(original_name, test_dir, rs) for _ in range(num_steps)] + + # confirm the name has been made unique in each step + step_names = {step.name for step in steps} + assert len(step_names) == num_steps + + +def test_dragon_step_launch_cmd(test_dir: str) -> None: + """Verify the expected launch cmd is generated w/minimal settings""" + exp_exe = "sleep" + exp_exe_args = "1" + rs = DragonRunSettings(exe=exp_exe, exe_args=[exp_exe_args]) + + original_name = "test" + step = DragonStep(original_name, test_dir, rs) + + launch_cmd = step.get_launch_cmd() + assert len(launch_cmd) == 2 + + # we'll verify the exe_args and exe name are handled correctly + exe, args = launch_cmd + assert exp_exe in exe + assert exp_exe_args in args + + # also, verify that a string exe_args param instead of list is handled correctly + exp_exe_args = "1 2 3" + rs = DragonRunSettings(exe=exp_exe, exe_args=exp_exe_args) + step = DragonStep(original_name, test_dir, rs) + launch_cmd = step.get_launch_cmd() + assert len(launch_cmd) == 4 # "/foo/bar/sleep 1 2 3" + + +def test_dragon_step_launch_cmd_multi_arg(test_dir: str) -> None: + """Verify the expected launch cmd is generated when multiple arguments + are passed to run settings""" + exp_exe = "sleep" + arg0, arg1, arg2 = "1", "2", "3" + rs = DragonRunSettings(exe=exp_exe, exe_args=[arg0, arg1, arg2]) + + original_name = "test" + + step = DragonStep(original_name, test_dir, rs) + + launch_cmd = step.get_launch_cmd() + assert len(launch_cmd) == 4 + + exe, *args = launch_cmd + assert exp_exe in exe + assert arg0 in args + assert arg1 in args + assert arg2 in args + + +def test_dragon_step_launch_cmd_no_bash( + test_dir: str, monkeypatch: pytest.MonkeyPatch +) -> None: + """Verify that requirement for bash shell is checked""" + exp_exe = "sleep" + arg0, arg1, arg2 = "1", "2", "3" + rs = DragonRunSettings(exe=exp_exe, exe_args=[arg0, arg1, arg2]) + rs.colocated_db_settings = {"foo": "bar"} # triggers bash lookup + + original_name = "test" + step = DragonStep(original_name, test_dir, rs) + + with pytest.raises(RuntimeError) as ex, monkeypatch.context() as ctx: + ctx.setattr(shutil, "which", lambda _: None) + step.get_launch_cmd() + + # verify the exception thrown is the one we're looking for + assert "Could not find" in ex.value.args[0] + + +def test_dragon_step_colocated_db() -> None: + # todo: implement a test for the branch where bash is found and + # run_settings.colocated_db_settings is set + ... + + +def test_dragon_step_container() -> None: + # todo: implement a test for the branch where run_settings.container + # is an instance of class `Singularity` + ... + + +def test_dragon_step_run_settings_accessor(test_dir: str) -> None: + """Verify the run settings passed to the step are copied correctly and + are not inadvertently modified outside the step""" + exp_exe = "sleep" + arg0, arg1, arg2 = "1", "2", "3" + rs = DragonRunSettings(exe=exp_exe, exe_args=[arg0, arg1, arg2]) + + original_name = "test" + step = DragonStep(original_name, test_dir, rs) + rs_output = step.run_settings + + assert rs.exe == rs_output.exe + assert rs.exe_args == rs_output.exe_args + + # ensure we have a deep copy + rs.exe = "foo" + assert id(step.run_settings) != id(rs) + assert step.run_settings.exe != rs.exe + + +def test_dragon_batch_step_creation(test_dir: str) -> None: + """Verify that the batch step is created with the values provided""" + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # confirm the name has been made unique to avoid conflicts + assert batch_step.name != batch_step_name + assert batch_step.entity_name == batch_step_name + assert batch_step.cwd == test_dir + assert batch_step.batch_settings is not None + assert batch_step.managed + + +def test_dragon_batch_step_add_to_batch(test_dir: str) -> None: + """Verify that steps are added to the batch correctly""" + rs = DragonRunSettings(exe="sleep", exe_args=["1"]) + + name0, name1, name2 = "test00", "test01", "test02" + step0 = DragonStep(name0, test_dir, rs) + step1 = DragonStep(name1, test_dir, rs) + step2 = DragonStep(name2, test_dir, rs) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + assert len(batch_step.steps) == 0 + + batch_step.add_to_batch(step0) + assert len(batch_step.steps) == 1 + assert name0 in ",".join({step.name for step in batch_step.steps}) + + batch_step.add_to_batch(step1) + assert len(batch_step.steps) == 2 + assert name1 in ",".join({step.name for step in batch_step.steps}) + + batch_step.add_to_batch(step2) + assert len(batch_step.steps) == 3 + assert name2 in ",".join({step.name for step in batch_step.steps}) + + +def test_dragon_batch_step_get_launch_command_meta_fail(test_dir: str) -> None: + """Verify that the batch launch command cannot be generated without + having the status directory set in the step metadata""" + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + with pytest.raises(KeyError) as ex: + batch_step.get_launch_cmd() + + +@pytest.mark.parametrize( + "batch_settings_class,batch_exe,batch_header,node_spec_tpl", + [ + pytest.param( + SbatchSettings, "sbatch", "#SBATCH", "#SBATCH --nodes={0}", id="sbatch" + ), + pytest.param(QsubBatchSettings, "qsub", "#PBS", "#PBS -l nodes={0}", id="qsub"), + ], +) +def test_dragon_batch_step_get_launch_command( + test_dir: str, + batch_settings_class: t.Type, + batch_exe: str, + batch_header: str, + node_spec_tpl: str, +) -> None: + """Verify that the batch launch command is properly generated and + the expected side effects are present (writing script file to disk)""" + test_path = pathlib.Path(test_dir) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = batch_settings_class(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # ensure the status_dir is set + status_dir = (test_path / ".smartsim" / "logs").as_posix() + batch_step.meta["status_dir"] = status_dir + + launch_cmd = batch_step.get_launch_cmd() + assert launch_cmd + + full_cmd = " ".join(launch_cmd) + assert batch_exe in full_cmd # verify launcher running the batch + assert test_dir in full_cmd # verify outputs are sent to expected directory + assert "batch_step.sh" in full_cmd # verify batch script name is in the command + + # ...verify that the script file is written when getting the launch command + script_path = pathlib.Path(launch_cmd[-1]) + assert script_path.exists() + assert len(script_path.read_bytes()) > 0 + + batch_script = script_path.read_text(encoding="utf-8") + + # ...verify the script file has the expected batch script header content + assert batch_header in batch_script + assert node_spec_tpl.format(num_nodes) in batch_script # verify node count is set + + # ...verify the script has the expected entrypoint command + batch_statements = [line for line in batch_script.split("\n") if line] + python_path = sys.executable + + entrypoint_cmd = batch_statements[-1] + assert python_path in entrypoint_cmd + assert "smartsim._core.entrypoints.dragon_client +submit" in entrypoint_cmd + + +def test_dragon_batch_step_write_request_file_no_steps(test_dir: str) -> None: + """Verify that the batch launch command writes an appropriate request file + if no steps are attached""" + test_path = pathlib.Path(test_dir) + + batch_step_name = "batch_step" + num_nodes = 4 + batch_settings = SbatchSettings(nodes=num_nodes) + batch_step = DragonBatchStep(batch_step_name, test_dir, batch_settings) + + # ensure the status_dir is set + status_dir = (test_path / ".smartsim" / "logs").as_posix() + batch_step.meta["status_dir"] = status_dir + + launch_cmd = batch_step.get_launch_cmd() + requests_file = get_request_path_from_batch_script(launch_cmd) + + # no steps have been added yet, so the requests file should be a serialized, empty list + assert requests_file.read_text(encoding="utf-8") == "[]" + + +def test_dragon_batch_step_write_request_file( + dragon_batch_step: DragonBatchStep, +) -> None: + """Verify that the batch launch command writes an appropriate request file + for the set of attached steps""" + # create steps with: + # no affinity, cpu affinity only, gpu affinity only, cpu and gpu affinity + cpu_affinities = [[], [0, 1, 2], [], [3, 4, 5, 6]] + gpu_affinities = [[], [], [0, 1, 2], [3, 4, 5, 6]] + + launch_cmd = dragon_batch_step.get_launch_cmd() + requests_file = get_request_path_from_batch_script(launch_cmd) + + requests_text = requests_file.read_text(encoding="utf-8") + requests_json: t.List[str] = json.loads(requests_text) + + # verify that there is an item in file for each step added to the batch + assert len(requests_json) == len(dragon_batch_step.steps) + + for index, req in enumerate(requests_json): + req_type, req_data = req.split("|", 1) + # the only steps added are to execute apps, requests should be of type "run" + assert req_type == "run" + + run_request = DragonRunRequest(**json.loads(req_data)) + assert run_request + assert run_request.policy.cpu_affinity == cpu_affinities[index] + assert run_request.policy.gpu_affinity == gpu_affinities[index] diff --git a/tests/test_manifest.py b/tests/test_manifest.py index c26868ebb..f4a1b0afb 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -26,6 +26,7 @@ import os.path +import typing as t from copy import deepcopy from uuid import uuid4 @@ -40,7 +41,9 @@ from smartsim._core.control.manifest import ( _LaunchedManifestMetadata as LaunchedManifestMetadata, ) +from smartsim._core.launcher.step import Step from smartsim.database import Orchestrator +from smartsim.entity import Ensemble, Model from smartsim.entity.dbobject import DBModel, DBScript from smartsim.error import SmartSimError from smartsim.settings import RunSettings @@ -51,22 +54,33 @@ # ---- create entities for testing -------- -rs = RunSettings("python", "sleep.py") +_EntityResult = t.Tuple[ + Experiment, t.Tuple[Model, Model], Ensemble, Orchestrator, DBModel, DBScript +] -exp = Experiment("util-test", launcher="local") -model = exp.create_model("model_1", run_settings=rs) -model_2 = exp.create_model("model_1", run_settings=rs) -ensemble = exp.create_ensemble("ensemble", run_settings=rs, replicas=1) -orc = Orchestrator() -orc_1 = deepcopy(orc) -orc_1.name = "orc2" +@pytest.fixture +def entities(test_dir: str) -> _EntityResult: + rs = RunSettings("python", "sleep.py") -db_script = DBScript("some-script", "def main():\n print('hello world')\n") -db_model = DBModel("some-model", "TORCH", b"some-model-bytes") + exp = Experiment("util-test", launcher="local", exp_path=test_dir) + model = exp.create_model("model_1", run_settings=rs) + model_2 = exp.create_model("model_1", run_settings=rs) + ensemble = exp.create_ensemble("ensemble", run_settings=rs, replicas=1) + orc = Orchestrator() + orc_1 = deepcopy(orc) + orc_1.name = "orc2" + + db_script = DBScript("some-script", "def main():\n print('hello world')\n") + db_model = DBModel("some-model", "TORCH", b"some-model-bytes") + + return exp, (model, model_2), ensemble, orc, db_model, db_script + + +def test_separate(entities: _EntityResult) -> None: + _, (model, _), ensemble, orc, _, _ = entities -def test_separate(): manifest = Manifest(model, ensemble, orc) assert manifest.models[0] == model assert len(manifest.models) == 1 @@ -75,24 +89,28 @@ def test_separate(): assert manifest.dbs[0] == orc -def test_separate_type(): +def test_separate_type() -> None: with pytest.raises(TypeError): - _ = Manifest([1, 2, 3]) + _ = Manifest([1, 2, 3]) # type: ignore -def test_name_collision(): +def test_name_collision(entities: _EntityResult) -> None: + _, (model, model_2), _, _, _, _ = entities + with pytest.raises(SmartSimError): _ = Manifest(model, model_2) -def test_catch_empty_ensemble(): +def test_catch_empty_ensemble(entities: _EntityResult) -> None: + _, _, ensemble, _, _, _ = entities + e = deepcopy(ensemble) e.entities = [] with pytest.raises(ValueError): _ = Manifest(e) -def test_corner_case(): +def test_corner_case() -> None: """tricky corner case where some variable may have a name attribute """ @@ -102,59 +120,77 @@ class Person: p = Person() with pytest.raises(TypeError): - _ = Manifest(p) + _ = Manifest(p) # type: ignore @pytest.mark.parametrize( - "patch, has_db_objects", + "target_obj, target_prop, target_value, has_db_objects", [ - pytest.param((), False, id="No DB Objects"), - pytest.param((model, "_db_models", [db_model]), True, id="Model w/ DB Model"), - pytest.param( - (model, "_db_scripts", [db_script]), True, id="Model w/ DB Script" - ), - pytest.param( - (ensemble, "_db_models", [db_model]), True, id="Ensemble w/ DB Model" - ), - pytest.param( - (ensemble, "_db_scripts", [db_script]), True, id="Ensemble w/ DB Script" - ), - pytest.param( - (ensemble.entities[0], "_db_models", [db_model]), - True, - id="Ensemble Member w/ DB Model", - ), - pytest.param( - (ensemble.entities[0], "_db_scripts", [db_script]), - True, - id="Ensemble Member w/ DB Script", - ), + pytest.param(None, None, None, False, id="No DB Objects"), + pytest.param("m0", "dbm", "dbm", True, id="Model w/ DB Model"), + pytest.param("m0", "dbs", "dbs", True, id="Model w/ DB Script"), + pytest.param("ens", "dbm", "dbm", True, id="Ensemble w/ DB Model"), + pytest.param("ens", "dbs", "dbs", True, id="Ensemble w/ DB Script"), + pytest.param("ens_0", "dbm", "dbm", True, id="Ensemble Member w/ DB Model"), + pytest.param("ens_0", "dbs", "dbs", True, id="Ensemble Member w/ DB Script"), ], ) -def test_manifest_detects_db_objects(monkeypatch, patch, has_db_objects): - if patch: +def test_manifest_detects_db_objects( + monkeypatch: pytest.MonkeyPatch, + target_obj: str, + target_prop: str, + target_value: str, + has_db_objects: bool, + entities: _EntityResult, +) -> None: + _, (model, _), ensemble, _, db_model, db_script = entities + target_map = { + "m0": model, + "dbm": db_model, + "dbs": db_script, + "ens": ensemble, + "ens_0": ensemble.entities[0], + } + prop_map = { + "dbm": "_db_models", + "dbs": "_db_scripts", + } + if target_obj: + patch = ( + target_map[target_obj], + prop_map[target_prop], + [target_map[target_value]], + ) monkeypatch.setattr(*patch) + assert Manifest(model, ensemble).has_db_objects == has_db_objects -def test_launched_manifest_transform_data(): +def test_launched_manifest_transform_data(entities: _EntityResult) -> None: + _, (model, model_2), ensemble, orc, _, _ = entities + models = [(model, 1), (model_2, 2)] ensembles = [(ensemble, [(m, i) for i, m in enumerate(ensemble.entities)])] dbs = [(orc, [(n, i) for i, n in enumerate(orc.entities)])] - launched = LaunchedManifest( + lmb = LaunchedManifest( metadata=LaunchedManifestMetadata("name", "path", "launcher", "run_id"), - models=models, - ensembles=ensembles, - databases=dbs, + models=models, # type: ignore + ensembles=ensembles, # type: ignore + databases=dbs, # type: ignore ) - transformed = launched.map(lambda x: str(x)) + transformed = lmb.map(lambda x: str(x)) + assert transformed.models == tuple((m, str(i)) for m, i in models) assert transformed.ensembles[0][1] == tuple((m, str(i)) for m, i in ensembles[0][1]) assert transformed.databases[0][1] == tuple((n, str(i)) for n, i in dbs[0][1]) -def test_launched_manifest_builder_correctly_maps_data(): - lmb = LaunchedManifestBuilder("name", "path", "launcher name", str(uuid4())) +def test_launched_manifest_builder_correctly_maps_data(entities: _EntityResult) -> None: + _, (model, model_2), ensemble, orc, _, _ = entities + + lmb = LaunchedManifestBuilder( + "name", "path", "launcher name", str(uuid4()) + ) # type: ignore lmb.add_model(model, 1) lmb.add_model(model_2, 1) lmb.add_ensemble(ensemble, [i for i in range(len(ensemble.entities))]) @@ -166,8 +202,14 @@ def test_launched_manifest_builder_correctly_maps_data(): assert len(manifest.databases) == 1 -def test_launced_manifest_builder_raises_if_lens_do_not_match(): - lmb = LaunchedManifestBuilder("name", "path", "launcher name", str(uuid4())) +def test_launced_manifest_builder_raises_if_lens_do_not_match( + entities: _EntityResult, +) -> None: + _, _, ensemble, orc, _, _ = entities + + lmb = LaunchedManifestBuilder( + "name", "path", "launcher name", str(uuid4()) + ) # type: ignore with pytest.raises(ValueError): lmb.add_ensemble(ensemble, list(range(123))) with pytest.raises(ValueError): @@ -175,17 +217,23 @@ def test_launced_manifest_builder_raises_if_lens_do_not_match(): def test_launched_manifest_builer_raises_if_attaching_data_to_empty_collection( - monkeypatch, -): - lmb = LaunchedManifestBuilder("name", "path", "launcher", str(uuid4())) + monkeypatch: pytest.MonkeyPatch, entities: _EntityResult +) -> None: + _, _, ensemble, _, _, _ = entities + + lmb: LaunchedManifestBuilder[t.Tuple[str, Step]] = LaunchedManifestBuilder( + "name", "path", "launcher", str(uuid4()) + ) monkeypatch.setattr(ensemble, "entities", []) with pytest.raises(ValueError): lmb.add_ensemble(ensemble, []) -def test_lmb_and_launched_manifest_have_same_paths_for_launched_metadata(): +def test_lmb_and_launched_manifest_have_same_paths_for_launched_metadata() -> None: exp_path = "/path/to/some/exp" - lmb = LaunchedManifestBuilder("exp_name", exp_path, "launcher", str(uuid4())) + lmb: LaunchedManifestBuilder[t.Tuple[str, Step]] = LaunchedManifestBuilder( + "exp_name", exp_path, "launcher", str(uuid4()) + ) manifest = lmb.finalize() assert ( lmb.exp_telemetry_subdirectory == manifest.metadata.exp_telemetry_subdirectory diff --git a/tests/test_model.py b/tests/test_model.py index 64a68b299..152ce2058 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -26,12 +26,14 @@ from uuid import uuid4 +import numpy as np import pytest from smartsim import Experiment from smartsim._core.control.manifest import LaunchedManifestBuilder from smartsim._core.launcher.step import SbatchStep, SrunStep from smartsim.entity import Ensemble, Model +from smartsim.entity.model import _parse_model_parameters from smartsim.error import EntityExistsError, SSUnsupportedError from smartsim.settings import RunSettings, SbatchSettings, SrunSettings from smartsim.settings.mpiSettings import _BaseMPISettings @@ -176,3 +178,16 @@ def test_models_batch_settings_are_ignored_in_ensemble( step_cmd = step.step_cmds[0] assert any("srun" in tok for tok in step_cmd) # call the model using run settings assert not any("sbatch" in tok for tok in step_cmd) # no sbatch in sbatch + + +@pytest.mark.parametrize("dtype", [int, np.float32, str]) +def test_good_model_params(dtype): + print(dtype(0.6)) + params = {"foo": dtype(0.6)} + assert all(isinstance(val, str) for val in _parse_model_parameters(params).values()) + + +@pytest.mark.parametrize("bad_val", [["eggs"], {"n": 5}, object]) +def test_bad_model_params(bad_val): + with pytest.raises(TypeError): + _parse_model_parameters({"foo": bad_val}) diff --git a/tests/test_preview.py b/tests/test_preview.py index 3c7bed6fe..a18d10728 100644 --- a/tests/test_preview.py +++ b/tests/test_preview.py @@ -357,7 +357,7 @@ def test_model_preview_properties(test_dir, wlmutils): assert hw_rs == hello_world_model.run_settings.exe_args[0] assert None == hello_world_model.batch_settings assert "port" in list(hello_world_model.params.items())[0] - assert hw_port in list(hello_world_model.params.items())[0] + assert str(hw_port) in list(hello_world_model.params.items())[0] assert "password" in list(hello_world_model.params.items())[1] assert hw_password in list(hello_world_model.params.items())[1] diff --git a/tests/test_sge_batch_settings.py b/tests/test_sge_batch_settings.py new file mode 100644 index 000000000..fa40b4b00 --- /dev/null +++ b/tests/test_sge_batch_settings.py @@ -0,0 +1,158 @@ +# BSD 2-Clause License +# +# Copyright (c) 2021-2024, Hewlett Packard Enterprise +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os.path as osp + +import pytest + +from smartsim import Experiment +from smartsim._core.launcher.sge.sgeParser import parse_qstat_jobid_xml +from smartsim.error import SSConfigError +from smartsim.settings import SgeQsubBatchSettings +from smartsim.settings.mpiSettings import _BaseMPISettings + +# The tests in this file belong to the group_b group +pytestmark = pytest.mark.group_b + +qstat_example = """ + + + + 1387693 + 3.50000 + test_1 + user1 + r + 2024-06-06T04:04:21 + example_node1 + 1600 + + + + + 1387695 + 3.48917 + test_2 + user1 + qw + 2024-05-20T16:47:46 + + 1600 + + + +""" + + +@pytest.mark.parametrize("pe_type", ["mpi", "smp"]) +def test_pe_config(pe_type): + settings = SgeQsubBatchSettings(ncpus=8, pe_type=pe_type) + assert settings._create_resource_list() == [f"-pe {pe_type} 8"] + + +def test_walltime(): + settings = SgeQsubBatchSettings(time="01:00:00") + assert settings._create_resource_list() == [ + f"-l h_rt=01:00:00", + ] + + +def test_ngpus(): + settings = SgeQsubBatchSettings() + settings.set_ngpus(1) + assert settings._create_resource_list() == [f"-l gpu=1"] + + +def test_account(): + settings = SgeQsubBatchSettings(account="foo") + assert settings.format_batch_args() == ["-A foo"] + + +def test_project(): + settings = SgeQsubBatchSettings() + settings.set_project("foo") + assert settings.format_batch_args() == ["-P foo"] + + +def test_update_context_variables(): + settings = SgeQsubBatchSettings() + settings.update_context_variables("ac", "foo") + settings.update_context_variables("sc", "foo", "bar") + settings.update_context_variables("dc", "foo") + assert settings._create_resource_list() == ["-ac foo", "-sc foo=bar", "-dc foo"] + + +def test_invalid_dc_and_value_update_context_variables(): + settings = SgeQsubBatchSettings() + with pytest.raises(SSConfigError): + settings.update_context_variables("dc", "foo", "bar") + + +@pytest.mark.parametrize("enable", [True, False]) +def test_set_hyperthreading(enable): + settings = SgeQsubBatchSettings() + settings.set_hyperthreading(enable) + assert settings._create_resource_list() == [f"-l threads={int(enable)}"] + + +def test_default_set_hyperthreading(): + settings = SgeQsubBatchSettings() + settings.set_hyperthreading() + assert settings._create_resource_list() == ["-l threads=1"] + + +def test_resources_is_a_copy(): + settings = SgeQsubBatchSettings() + resources = settings.resources + assert resources is not settings._resources + + +def test_resources_not_set_on_error(): + settings = SgeQsubBatchSettings() + unaltered_resources = settings.resources + with pytest.raises(TypeError): + settings.resources = {"meep": Exception} + + assert unaltered_resources == settings.resources + + +def test_qstat_jobid_xml(): + assert parse_qstat_jobid_xml(qstat_example, "1387693") == "r" + assert parse_qstat_jobid_xml(qstat_example, "1387695") == "qw" + assert parse_qstat_jobid_xml(qstat_example, "9999999") is None + + +def test_sge_launcher_defaults(monkeypatch, fileutils): + + stub_path = osp.join("mpi_impl_stubs", "openmpi4") + stub_path = fileutils.get_test_dir_path(stub_path) + monkeypatch.setenv("PATH", stub_path, prepend=":") + exp = Experiment("test_sge_run_settings", launcher="sge") + + bs = exp.create_batch_settings() + assert isinstance(bs, SgeQsubBatchSettings) + rs = exp.create_run_settings("echo") + assert isinstance(rs, _BaseMPISettings) diff --git a/tests/test_shell_util.py b/tests/test_shell_util.py index 24f6b023c..2c4e19001 100644 --- a/tests/test_shell_util.py +++ b/tests/test_shell_util.py @@ -28,7 +28,7 @@ import psutil import pytest -from smartsim._core.launcher.util.shell import * +from smartsim._core.utils.shell import * # The tests in this file belong to the group_b group pytestmark = pytest.mark.group_b