Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{ai}[gfbf/2024a] jax v0.4.34, ml_dtypes v0.5.0 w/ CUDA 12.6.0 WIP #21924

Open
wants to merge 23 commits into
base: develop
Choose a base branch
from

Conversation

ThomasHoffmann77
Copy link
Contributor

@ThomasHoffmann77 ThomasHoffmann77 commented Nov 28, 2024

(created using eb --new-pr)
requires:

TODO:

  • Bazel downloads
  • Use Bazel 7.4.1
  • Tests
  • (Plugins ? )

… jax-0.4.35_easyblock_compat.patch, jax-0.4.35_fix-pybind11-systemlib_cupti.patch
Copy link

github-actions bot commented Nov 28, 2024

Updated software jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb

Diff against jax-0.4.25-gfbf-2023a.eb

easybuild/easyconfigs/j/jax/jax-0.4.25-gfbf-2023a.eb

diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.25-gfbf-2023a.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb
index a4a86382ad..8e35680c17 100644
--- a/easybuild/easyconfigs/j/jax/jax-0.4.25-gfbf-2023a.eb
+++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb
@@ -6,54 +6,77 @@
 easyblock = 'PythonBundle'
 
 name = 'jax'
-version = '0.4.25'
+version = '0.4.34'
+versionsuffix = '-CUDA-%(cudaver)s'
 
 homepage = 'https://jax.readthedocs.io/'
 description = """Composable transformations of Python+NumPy programs:
 differentiate, vectorize, JIT to GPU/TPU, and more"""
 
-toolchain = {'name': 'gfbf', 'version': '2023a'}
+toolchain = {'name': 'gfbf', 'version': '2024a'}
+cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"]
 
 builddependencies = [
-    ('Bazel', '6.3.1'),
-    ('pytest-xdist', '3.3.1'),
-    ('git', '2.41.0', '-nodocs'),  # bazel uses git to fetch repositories
-    ('matplotlib', '3.7.2'),  # required for tests/lobpcg_test.py
-    ('poetry', '1.5.1'),
-    ('pybind11', '2.11.1'),
+    # ('Bazel', '7.4.1'),  TODO: problems with @@local_config_python//:py3_runtime:
+    # Error in fail: interpreter_path must be an absolute path
+    # Bazel 6.5.0 (download) works.
+    ('pybind11', '2.13.6'),
+    ('pytest-xdist', '3.6.1'),
+    ('git', '2.45.1'),  # bazel uses git to fetch repositories
+    ('matplotlib', '3.9.2'),  # required for tests/lobpcg_test.py
+    ('poetry', '1.8.3'),
+    ('Clang', '18.1.8', versionsuffix)
 ]
 
 dependencies = [
-    ('Python', '3.11.3'),
-    ('SciPy-bundle', '2023.07'),
+    ('CUDA', '12.6.0', '', SYSTEM),  # 12.6.2 ?
+    ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM),
+    ('NCCL', '2.22.3', versionsuffix),
+    ('Python', '3.12.3'),
+    ('SciPy-bundle', '2024.05'),  # 2024.11 ?
     ('absl-py', '2.1.0'),
-    ('flatbuffers-python', '23.5.26'),
-    ('ml_dtypes', '0.3.2'),
-    ('zlib', '1.2.13'),
+    ('flatbuffers-python', '24.3.25'),
+    ('ml_dtypes', '0.5.0'),
+    ('zlib', '1.3.1'),
 ]
 
 # downloading xla and other tarballs to avoid that Bazel downloads it during the build
 local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives'
 # note: following commits *must* be the exact same onces used upstream
 # XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl
-local_xla_commit = '4ccfe33c71665ddcbca5b127fefe8baa3ed632d4'
+local_xla_commit = 'cd6e808c59f53b40a99df1f1b860db9a3e598bff'
 # TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl
-local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25'
+local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25'  # TODO: still required?
+# TODO: add other downloads
 
 # Use sources downloaded by EasyBuild
 _jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" '
 # Use dependencies from EasyBuild
 _jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" '
-_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include" '
+_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" '
 # Avoid warning (treated as error) in upb/table.c
-_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" '
+_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" '  # TODO: still required?
+# _jaxlib_buildopts += '--nouse_clang '  #TODO: avoid clang (?)
+_jaxlib_buildopts += '--cuda_version=%(cudaver)s '
+_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 '
+# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include";
+# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include <cupti.h>): 
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """
+_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """
+
+# get rid of .devDate versionsuffix:  TODO: find a better way
+# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """  does not work (?)
+_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """
+_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """  # required?
 
 components = [
     ('jaxlib', version, {
         'sources': [
             {
                 'source_urls': ['https://github.com/google/jax/archive/'],
-                'filename': '%(name)s-v%(version)s.tar.gz',
+                'filename': 'jax-v%(version)s.tar.gz',
             },
             {
                 'source_urls': ['https://github.com/openxla/xla/archive'],
@@ -68,39 +91,71 @@ components = [
                 'extract_cmd': local_extract_cmd,
             },
         ],
-        'patches': ['jax-0.4.25_fix-pybind11-systemlib.patch'],
+        'patches': [
+            'jax-0.4.35_easyblock_compat.patch',
+            'jax-0.4.35_fix-pybind11-systemlib_cupti.patch',
+            'jax-0.4.35_version.patch',
+        ],
         'checksums': [
-            {'jaxlib-v0.4.25.tar.gz':
-             'fc1197c401924942eb14185a61688d0c476e3e81ff71f9dc95e620b57c06eec8'},
-            {'xla-4ccfe33c.tar.gz':
-             '8a59b9af7d0850059d7043f7043c780066d61538f3af536e8a10d3d717f35089'},
+            {'jax-v0.4.34.tar.gz':
+             'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'},
+            {'xla-cd6e808c.tar.gz':
+             '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'},
             {'tf_runtime-0aeefb16.tar.gz':
              'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'},
-            {'jax-0.4.25_fix-pybind11-systemlib.patch':
-             'daad5b726d1a138431b05eb60ecf4c89c7b5148eb939721800bdf43d804ca033'},
+            {'jax-0.4.35_easyblock_compat.patch':
+             'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'},
+            {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch':
+             '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'},
+            {'jax-0.4.35_version.patch':
+             'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'},
         ],
-        'start_dir': 'jax-jaxlib-v%(version)s',
-        'buildopts': _jaxlib_buildopts
+        'start_dir': 'jax-jax-v%(version)s',
+        'buildopts': _jaxlib_buildopts,
+        'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' +
+                        'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' +
+                        _no_devtag
     }),
 ]
 
+# Some tests require an isolated run:  TODO: still required?
+local_isolated_tests = [
+    'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp',
+    'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc',
+    'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' +
+    '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32',
+]
+# deliberately not testing in parallel, as that results in (additional) failing tests;
+# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing,
+# see https://github.com/google/jax/issues/7323 and
+# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst;
+# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs;
+# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores;
+local_test_exports = [
+    "NVIDIA_TF32_OVERRIDE=0",
+    "CUDA_VISIBLE_DEVICES=0",
+    "XLA_PYTHON_CLIENT_ALLOCATOR=platform",
+    "JAX_ENABLE_X64=true",
+]
+local_test = ''.join(['export %s;' % x for x in local_test_exports])
+# run all tests at once except for local_isolated_tests:
+local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests])
+# run remaining local_isolated_tests separately: 
+local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests])
+
 use_pip = True
 
 exts_list = [
     (name, version, {
-        'sources': [
-            {
-                'source_urls': ['https://github.com/google/jax/archive/'],
-                'filename': '%(name)s-v%(version)s.tar.gz',
-            },
-        ],
-        'patches': ['jax-0.4.25_fix_env_test_no_log_spam.patch'],
+        'patches': ['jax-0.4.35_version.patch'],
+        'preinstallopts': _no_devtag,
+        'runtest': False,
+        'source_tmpl': '%(name)s-v%(version)s.tar.gz',
+        'source_urls': ['https://github.com/google/jax/archive/'],
         'checksums': [
-            {'jax-v0.4.25.tar.gz': '8b30af49688c0c13b82c6f5ce992727c00b5fc6d04a4c6962012f4246fa664eb'},
-            {'jax-0.4.25_fix_env_test_no_log_spam.patch':
-             'a18b5f147569d9ad41025124333a0f04fd0d0e0f9e4309658d7f6b9b838e2e2a'},
+            {'jax-v0.4.34.tar.gz': 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'},
+            {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'},
         ],
-        'runtest': "pytest -n %(parallel)s tests",
     }),
 ]
 
Diff against jax-0.4.25-gfbf-2023a-CUDA-12.1.1.eb

easybuild/easyconfigs/j/jax/jax-0.4.25-gfbf-2023a-CUDA-12.1.1.eb

diff --git a/easybuild/easyconfigs/j/jax/jax-0.4.25-gfbf-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb
index 75355c6c84..8e35680c17 100644
--- a/easybuild/easyconfigs/j/jax/jax-0.4.25-gfbf-2023a-CUDA-12.1.1.eb
+++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb
@@ -6,59 +6,77 @@
 easyblock = 'PythonBundle'
 
 name = 'jax'
-version = '0.4.25'
+version = '0.4.34'
 versionsuffix = '-CUDA-%(cudaver)s'
 
 homepage = 'https://jax.readthedocs.io/'
 description = """Composable transformations of Python+NumPy programs:
 differentiate, vectorize, JIT to GPU/TPU, and more"""
 
-toolchain = {'name': 'gfbf', 'version': '2023a'}
+toolchain = {'name': 'gfbf', 'version': '2024a'}
 cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"]
 
 builddependencies = [
-    ('Bazel', '6.3.1'),
-    ('pytest-xdist', '3.3.1'),
-    ('git', '2.41.0', '-nodocs'),  # bazel uses git to fetch repositories
-    ('matplotlib', '3.7.2'),  # required for tests/lobpcg_test.py
-    ('poetry', '1.5.1'),
-    ('pybind11', '2.11.1'),
+    # ('Bazel', '7.4.1'),  TODO: problems with @@local_config_python//:py3_runtime:
+    # Error in fail: interpreter_path must be an absolute path
+    # Bazel 6.5.0 (download) works.
+    ('pybind11', '2.13.6'),
+    ('pytest-xdist', '3.6.1'),
+    ('git', '2.45.1'),  # bazel uses git to fetch repositories
+    ('matplotlib', '3.9.2'),  # required for tests/lobpcg_test.py
+    ('poetry', '1.8.3'),
+    ('Clang', '18.1.8', versionsuffix)
 ]
 
 dependencies = [
-    ('CUDA', '12.1.1', '', SYSTEM),
-    ('cuDNN', '8.9.2.26', versionsuffix, SYSTEM),
-    ('NCCL', '2.18.3', versionsuffix),
-    ('Python', '3.11.3'),
-    ('SciPy-bundle', '2023.07'),
+    ('CUDA', '12.6.0', '', SYSTEM),  # 12.6.2 ?
+    ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM),
+    ('NCCL', '2.22.3', versionsuffix),
+    ('Python', '3.12.3'),
+    ('SciPy-bundle', '2024.05'),  # 2024.11 ?
     ('absl-py', '2.1.0'),
-    ('flatbuffers-python', '23.5.26'),
-    ('ml_dtypes', '0.3.2'),
-    ('zlib', '1.2.13'),
+    ('flatbuffers-python', '24.3.25'),
+    ('ml_dtypes', '0.5.0'),
+    ('zlib', '1.3.1'),
 ]
 
 # downloading xla and other tarballs to avoid that Bazel downloads it during the build
 local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives'
 # note: following commits *must* be the exact same onces used upstream
 # XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl
-local_xla_commit = '4ccfe33c71665ddcbca5b127fefe8baa3ed632d4'
+local_xla_commit = 'cd6e808c59f53b40a99df1f1b860db9a3e598bff'
 # TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl
-local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25'
+local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25'  # TODO: still required?
+# TODO: add other downloads
 
 # Use sources downloaded by EasyBuild
 _jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" '
 # Use dependencies from EasyBuild
 _jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" '
-_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include" '
+_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" '
 # Avoid warning (treated as error) in upb/table.c
-_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" '
+_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" '  # TODO: still required?
+# _jaxlib_buildopts += '--nouse_clang '  #TODO: avoid clang (?)
+_jaxlib_buildopts += '--cuda_version=%(cudaver)s '
+_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 '
+# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include";
+# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include <cupti.h>): 
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """
+_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """
+
+# get rid of .devDate versionsuffix:  TODO: find a better way
+# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """  does not work (?)
+_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """
+_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """  # required?
 
 components = [
     ('jaxlib', version, {
         'sources': [
             {
                 'source_urls': ['https://github.com/google/jax/archive/'],
-                'filename': '%(name)s-v%(version)s.tar.gz',
+                'filename': 'jax-v%(version)s.tar.gz',
             },
             {
                 'source_urls': ['https://github.com/openxla/xla/archive'],
@@ -73,23 +91,34 @@ components = [
                 'extract_cmd': local_extract_cmd,
             },
         ],
-        'patches': ['jax-0.4.25_fix-pybind11-systemlib.patch'],
+        'patches': [
+            'jax-0.4.35_easyblock_compat.patch',
+            'jax-0.4.35_fix-pybind11-systemlib_cupti.patch',
+            'jax-0.4.35_version.patch',
+        ],
         'checksums': [
-            {'jaxlib-v0.4.25.tar.gz':
-             'fc1197c401924942eb14185a61688d0c476e3e81ff71f9dc95e620b57c06eec8'},
-            {'xla-4ccfe33c.tar.gz':
-             '8a59b9af7d0850059d7043f7043c780066d61538f3af536e8a10d3d717f35089'},
+            {'jax-v0.4.34.tar.gz':
+             'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'},
+            {'xla-cd6e808c.tar.gz':
+             '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'},
             {'tf_runtime-0aeefb16.tar.gz':
              'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'},
-            {'jax-0.4.25_fix-pybind11-systemlib.patch':
-             'daad5b726d1a138431b05eb60ecf4c89c7b5148eb939721800bdf43d804ca033'},
+            {'jax-0.4.35_easyblock_compat.patch':
+             'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'},
+            {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch':
+             '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'},
+            {'jax-0.4.35_version.patch':
+             'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'},
         ],
-        'start_dir': 'jax-jaxlib-v%(version)s',
-        'buildopts': _jaxlib_buildopts
+        'start_dir': 'jax-jax-v%(version)s',
+        'buildopts': _jaxlib_buildopts,
+        'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' +
+                        'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' +
+                        _no_devtag
     }),
 ]
 
-# Some tests require an isolated run:
+# Some tests require an isolated run:  TODO: still required?
 local_isolated_tests = [
     'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp',
     'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc',
@@ -118,15 +147,15 @@ use_pip = True
 
 exts_list = [
     (name, version, {
+        'patches': ['jax-0.4.35_version.patch'],
+        'preinstallopts': _no_devtag,
+        'runtest': False,
         'source_tmpl': '%(name)s-v%(version)s.tar.gz',
         'source_urls': ['https://github.com/google/jax/archive/'],
-        'patches': ['jax-0.4.25_fix_env_test_no_log_spam.patch'],
         'checksums': [
-            {'jax-v0.4.25.tar.gz': '8b30af49688c0c13b82c6f5ce992727c00b5fc6d04a4c6962012f4246fa664eb'},
-            {'jax-0.4.25_fix_env_test_no_log_spam.patch':
-             'a18b5f147569d9ad41025124333a0f04fd0d0e0f9e4309658d7f6b9b838e2e2a'},
+            {'jax-v0.4.34.tar.gz': 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'},
+            {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'},
         ],
-        'runtest': local_test,
     }),
 ]
 
Diff against jax-0.3.25-foss-2022a.eb

easybuild/easyconfigs/j/jax/jax-0.3.25-foss-2022a.eb

diff --git a/easybuild/easyconfigs/j/jax/jax-0.3.25-foss-2022a.eb b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb
index fb821e58e1..8e35680c17 100644
--- a/easybuild/easyconfigs/j/jax/jax-0.3.25-foss-2022a.eb
+++ b/easybuild/easyconfigs/j/jax/jax-0.4.34-gfbf-2024a-CUDA-12.6.0.eb
@@ -1,114 +1,164 @@
 # This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild
 # Author: Denis Kristak
 # Updated by: Alex Domingo (Vrije Universiteit Brussel)
+# Updated by: Pavel Tománek (INUITS)
+# Updated by: Thomas Hoffmann (EMBL Heidelberg)
 easyblock = 'PythonBundle'
 
 name = 'jax'
-version = '0.3.25'
+version = '0.4.34'
+versionsuffix = '-CUDA-%(cudaver)s'
 
-homepage = 'https://pypi.python.org/pypi/jax'
+homepage = 'https://jax.readthedocs.io/'
 description = """Composable transformations of Python+NumPy programs:
 differentiate, vectorize, JIT to GPU/TPU, and more"""
 
-toolchain = {'name': 'foss', 'version': '2022a'}
+toolchain = {'name': 'gfbf', 'version': '2024a'}
+cuda_compute_capabilities = ["5.0", "6.0", "6.1", "7.0", "7.5", "8.0", "8.6", "9.0"]
 
 builddependencies = [
-    ('Bazel', '5.1.1'),
-    ('pytest-xdist', '2.5.0'),
-    # git 2.x required to fetch repository 'io_bazel_rules_docker'
-    ('git', '2.36.0', '-nodocs'),
-    ('matplotlib', '3.5.2'),  # required for tests/lobpcg_test.py
+    # ('Bazel', '7.4.1'),  TODO: problems with @@local_config_python//:py3_runtime:
+    # Error in fail: interpreter_path must be an absolute path
+    # Bazel 6.5.0 (download) works.
+    ('pybind11', '2.13.6'),
+    ('pytest-xdist', '3.6.1'),
+    ('git', '2.45.1'),  # bazel uses git to fetch repositories
+    ('matplotlib', '3.9.2'),  # required for tests/lobpcg_test.py
+    ('poetry', '1.8.3'),
+    ('Clang', '18.1.8', versionsuffix)
 ]
 
 dependencies = [
-    ('Python', '3.10.4'),
-    ('SciPy-bundle', '2022.05'),
-    ('flatbuffers-python', '2.0'),
+    ('CUDA', '12.6.0', '', SYSTEM),  # 12.6.2 ?
+    ('cuDNN', '9.5.0.50', versionsuffix, SYSTEM),
+    ('NCCL', '2.22.3', versionsuffix),
+    ('Python', '3.12.3'),
+    ('SciPy-bundle', '2024.05'),  # 2024.11 ?
+    ('absl-py', '2.1.0'),
+    ('flatbuffers-python', '24.3.25'),
+    ('ml_dtypes', '0.5.0'),
+    ('zlib', '1.3.1'),
 ]
 
-# downloading TensorFlow tarball to avoid that Bazel downloads it during the build
-# note: this *must* be the exact same commit as used in WORKSPACE
-local_tf_commit = 'f0fe8d4c04fab1f157854a1aa3c136377901cdef'
-local_tf_dir = 'tensorflow-%s' % local_tf_commit
-local_tf_builddir = '%(builddir)s/' + local_tf_dir
+# downloading xla and other tarballs to avoid that Bazel downloads it during the build
+local_extract_cmd = 'mkdir -p %(builddir)s/archives && cp %s %(builddir)s/archives'
+# note: following commits *must* be the exact same onces used upstream
+# XLA_COMMIT from jax-jaxlib: third_party/xla/workspace.bzl
+local_xla_commit = 'cd6e808c59f53b40a99df1f1b860db9a3e598bff'
+# TFRT_COMMIT from xla: third_party/tsl/third_party/tf_runtime/workspace.bzl
+local_tfrt_commit = '0aeefb1660d7e37964b2bb71b1f518096bda9a25'  # TODO: still required?
+# TODO: add other downloads
 
-# replace remote TensorFlow repository with the local one from EB
-local_jax_prebuildopts = "sed -i -f jaxlib_local-tensorflow-repo.sed WORKSPACE && "
-local_jax_prebuildopts += "sed -i 's|EB_TF_REPOPATH|%s|' WORKSPACE && " % local_tf_builddir
+# Use sources downloaded by EasyBuild
+_jaxlib_buildopts = '--bazel_options="--distdir=%(builddir)s/archives" '
+# Use dependencies from EasyBuild
+_jaxlib_buildopts += '--bazel_options="--action_env=TF_SYSTEM_LIBS=pybind11" '
+_jaxlib_buildopts += '--bazel_options="--action_env=CPATH=$EBROOTPYBIND11/include:$EBROOTCUDA/extras/CUPTI/include" '
+# Avoid warning (treated as error) in upb/table.c
+_jaxlib_buildopts += '--bazel_options="--copt=-Wno-maybe-uninitialized" '  # TODO: still required?
+# _jaxlib_buildopts += '--nouse_clang '  #TODO: avoid clang (?)
+_jaxlib_buildopts += '--cuda_version=%(cudaver)s '
+_jaxlib_buildopts += '--python_bin_path=$EBROOTPYTHON/bin/python3 '
+# Do not use hermetic CUDA/cuDNN/NCCL: (requires action_env=CPATH=$EBROOTCUDA/extras/CUPTI/include";
+# requires patch of external/xla/xla/tsl/cuda/cupti_stub.cc and jaxlib/gpu/vendor.h (#include <cupti.h>): 
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDNN_PATH="$EBROOTCUDNN" """
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_NCCL_PATH="$EBROOTNCCL" """
+_jaxlib_buildopts += """--bazel_options=--repo_env=LOCAL_CUDA_PATH="$EBROOTCUDA" """
+_jaxlib_buildopts += """--bazel_options="--copt=-Ithird_party/gpus/cuda/extras/CUPTI/include" """
 
-use_pip = True
-
-default_easyblock = 'PythonPackage'
-default_component_specs = {
-    'sources': [SOURCE_TAR_GZ],
-    'source_urls': [PYPI_SOURCE],
-    'start_dir': '%(name)s-%(version)s',
-    'use_pip': True,
-    'sanity_pip_check': True,
-    'download_dep_fail': True,
-}
+# get rid of .devDate versionsuffix:  TODO: find a better way
+# _no_devtag = """ export JAX_RELEASE && export JAXLIB_RELEASE && """  does not work (?)
+_no_devtag = """ sed -i "s/version=__version__/version='%(version)s'/g" setup.py && """
+_jaxlib_buildopts += """--bazel_options="--action_env=JAXLIB_RELEASE=1" """  # required?
 
 components = [
-    ('absl-py', '1.3.0', {
-        'options': {'modulename': 'absl'},
-        'checksums': ['463c38a08d2e4cef6c498b76ba5bd4858e4c6ef51da1a5a1f27139a022e20248'],
-    }),
     ('jaxlib', version, {
         'sources': [
-            '%(name)s-v%(version)s.tar.gz',
             {
-                'download_filename': '%s.tar.gz' % local_tf_commit,
-                'filename': 'tensorflow-%s.tar.gz' % local_tf_commit,
-            }
-        ],
-        'source_urls': [
-            'https://github.com/google/jax/archive/',
-            'https://github.com/tensorflow/tensorflow/archive/'
+                'source_urls': ['https://github.com/google/jax/archive/'],
+                'filename': 'jax-v%(version)s.tar.gz',
+            },
+            {
+                'source_urls': ['https://github.com/openxla/xla/archive'],
+                'download_filename': '%s.tar.gz' % local_xla_commit,
+                'filename': 'xla-%s.tar.gz' % local_xla_commit[:8],
+                'extract_cmd': local_extract_cmd,
+            },
+            {
+                'source_urls': ['https://github.com/tensorflow/runtime/archive'],
+                'download_filename': '%s.tar.gz' % local_tfrt_commit,
+                'filename': 'tf_runtime-%s.tar.gz' % local_tfrt_commit[:8],
+                'extract_cmd': local_extract_cmd,
+            },
         ],
         'patches': [
-            ('jaxlib_local-tensorflow-repo.sed', '.'),
-            ('TensorFlow-2.7.0_cuda-noncanonical-include-paths.patch', '../' + local_tf_dir),
+            'jax-0.4.35_easyblock_compat.patch',
+            'jax-0.4.35_fix-pybind11-systemlib_cupti.patch',
+            'jax-0.4.35_version.patch',
         ],
         'checksums': [
-            # jaxlib-v0.3.25.tar.gz
-            '73ebc7868631cd9d520385557bbd7f08762d748a5a6a1bebef0f3b8d7ba748ef',
-            # tensorflow-f0fe8d4c04fab1f157854a1aa3c136377901cdef.tar.gz
-            '9ebba3031e8a81993682e4b9e43891ebb8480b6287e635df8e7efaa45ab5ede7',
-            # jaxlib_local-tensorflow-repo.sed
-            'abb5c3b97f4e317bce9f22ed3eeea3b9715365818d8b50720d937e2d41d5c4e5',
-            # TensorFlow-2.7.0_cuda-noncanonical-include-paths.patch
-            '0a759010c253d49755955cd5f028e75de4a4c447dcc8f5a0d9f47cce6881a9db',
+            {'jax-v0.4.34.tar.gz':
+             'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'},
+            {'xla-cd6e808c.tar.gz':
+             '65cb6d63ef4083b35775052636cb9c629f86db6947c8b91711923ba31dbdcde8'},
+            {'tf_runtime-0aeefb16.tar.gz':
+             'a3df827d7896774cb1d80bf4e1c79ab05c268f29bd4d3db1fb5a4b9c2079d8e3'},
+            {'jax-0.4.35_easyblock_compat.patch':
+             'cbf4ad92b8438c4ce2a975efce1c47c57d4c3b117bceee071ab660f964057223'},
+            {'jax-0.4.35_fix-pybind11-systemlib_cupti.patch':
+             '78efe6b5108a5da1935258286c94dea8438fd03651533c34023eeba27f514130'},
+            {'jax-0.4.35_version.patch':
+             'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'},
         ],
-        'start_dir': 'jax-jaxlib-v%(version)s',
-        'prebuildopts': local_jax_prebuildopts,
+        'start_dir': 'jax-jax-v%(version)s',
+        'buildopts': _jaxlib_buildopts,
+        'prebuildopts': ' mkdir third_party/gpus/cuda/extras/ -p && ' +
+                        'ln -s $EBROOTCUDA/extras/CUPTI third_party/gpus/cuda/extras --relative &&' +
+                        _no_devtag
     }),
 ]
 
+# Some tests require an isolated run:  TODO: still required?
+local_isolated_tests = [
+    'tests/host_callback_test.py::HostCallbackTapTest::test_tap_scan_custom_jvp',
+    'tests/host_callback_test.py::HostCallbackTapTest::test_tap_transforms_doc',
+    'tests/lax_scipy_special_functions_test.py::LaxScipySpcialFunctionsTest' +
+    '::testScipySpecialFun_gammainc_s_2x1x4_float32_float32',
+]
+# deliberately not testing in parallel, as that results in (additional) failing tests;
+# use XLA_PYTHON_CLIENT_ALLOCATOR=platform to allocate and deallocate GPU memory during testing,
+# see https://github.com/google/jax/issues/7323 and
+# https://github.com/google/jax/blob/main/docs/gpu_memory_allocation.rst;
+# use CUDA_VISIBLE_DEVICES=0 to avoid failing tests on systems with multiple GPUs;
+# use NVIDIA_TF32_OVERRIDE=0 to avoid loosing numerical precision by disabling TF32 Tensor Cores;
+local_test_exports = [
+    "NVIDIA_TF32_OVERRIDE=0",
+    "CUDA_VISIBLE_DEVICES=0",
+    "XLA_PYTHON_CLIENT_ALLOCATOR=platform",
+    "JAX_ENABLE_X64=true",
+]
+local_test = ''.join(['export %s;' % x for x in local_test_exports])
+# run all tests at once except for local_isolated_tests:
+local_test += "pytest -vv tests %s && " % ' '.join(['--deselect %s' % x for x in local_isolated_tests])
+# run remaining local_isolated_tests separately: 
+local_test += ' && '.join(['pytest -vv %s' % x for x in local_isolated_tests])
+
+use_pip = True
+
 exts_list = [
-    ('opt_einsum', '3.3.0', {
-        'checksums': ['59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549'],
-    }),
-    ('etils', '0.8.0', {
-        'checksums': ['d1d5af7bd9c784a273c4e1eccfaa8feaca5e0481a08717b5313fa231da22a903'],
-    }),
     (name, version, {
+        'patches': ['jax-0.4.35_version.patch'],
+        'preinstallopts': _no_devtag,
+        'runtest': False,
         'source_tmpl': '%(name)s-v%(version)s.tar.gz',
         'source_urls': ['https://github.com/google/jax/archive/'],
-        'patches': [
-            'jax-0.3.23_relax-testPoly5-tolerance.patch',
-            'jax-0.3.25_skip-qdwh-test-rank-deficient-deficient.patch',
-        ],
         'checksums': [
-            {'jax-v0.3.25.tar.gz': '49e8ce88ddd7dd0de86116c9d75d98a577a9061377ec423493fbac5ea29f79f0'},
-            {'jax-0.3.23_relax-testPoly5-tolerance.patch':
-             'be64bf36dde4884a97b6c8bb22c6b14ab5b24033cd40bfe7ce18363c55c30e87'},
-            {'jax-0.3.25_skip-qdwh-test-rank-deficient-deficient.patch':
-             '70f16f2dba03ab162ce6e13ea61774524b485e9630209bbd4bec81fd16c8812f'},
+            {'jax-v0.4.34.tar.gz': 'd3a75ad667772309ade81350fa70c4a78028a920028800282e46d8383c0ee6bb'},
+            {'jax-0.4.35_version.patch': 'cd2139a7802abf14b4b2cecee331aed80fff2ef91e16fa105093aea0795455e8'},
         ],
-        'runtest': "pytest -n %(parallel)s tests",
     }),
 ]
 
 sanity_pip_check = True
 
-moduleclass = 'tools'
+moduleclass = 'ai'

Updated software ml_dtypes-0.5.0-gfbf-2024a.eb

Diff against ml_dtypes-0.3.2-gfbf-2023a.eb

easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.3.2-gfbf-2023a.eb

diff --git a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.3.2-gfbf-2023a.eb b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb
index 9c3a18bfdb..5a20f1d845 100644
--- a/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.3.2-gfbf-2023a.eb
+++ b/easybuild/easyconfigs/m/ml_dtypes/ml_dtypes-0.5.0-gfbf-2024a.eb
@@ -1,8 +1,8 @@
-# Thomas Hoffmann, EMBL Heidelberg, [email protected], 2024/02
+# Thomas Hoffmann, EMBL Heidelberg, [email protected], 2024/11
 easyblock = 'PythonBundle'
 
 name = 'ml_dtypes'
-version = '0.3.2'
+version = '0.5.0'
 
 homepage = 'https://github.com/jax-ml/ml_dtypes'
 description = """
@@ -18,11 +18,16 @@ float8_e5m2
 float8_e5m2fnuz
 """
 
-toolchain = {'name': 'gfbf', 'version': '2023a'}
+toolchain = {'name': 'gfbf', 'version': '2024a'}
+
+builddependencies = [
+    ('poetry', '1.8.3'),
+]
 
 dependencies = [
-    ('Python', '3.11.3'),
-    ('SciPy-bundle', '2023.07'),
+    ('Python', '3.12.3'),
+    # ('SciPy-bundle', '2024.11'), ?
+    ('SciPy-bundle', '2024.05'),
 ]
 
 
@@ -31,16 +36,16 @@ use_pip = True
 default_easyblock = 'PythonPackage'
 
 exts_list = [
-    ('opt_einsum', '3.3.0', {
-        'checksums': ['59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549'],
+    ('opt_einsum', '3.4.0', {
+        'checksums': ['96ca72f1b886d148241348783498194c577fa30a8faac108586b14f1ba4473ac'],
     }),
-    ('etils', '1.6.0', {
-        'checksums': ['c635fbd02a79fed4ad76825d31306b581d22b40671721daa8bc279cf6333e48a'],
+    ('etils', '1.10.0', {
+        'checksums': ['4eaa9d7248fd4eeb75e44d47ca29875a5ccea044cc14a17435794bf8ac116a05'],
     }),
     (name, version, {
         'patches': [('ml_dtypes-0.3.2_EigenAvx512.patch', 1)],
         'checksums': [
-            {'ml_dtypes-0.3.2.tar.gz': '533059bc5f1764fac071ef54598db358c167c51a718f68f5bb55e3dee79d2967'},
+            {'ml_dtypes-0.5.0.tar.gz': '3e7d3a380fe73a63c884f06136f8baa7a5249cc8e9fdec677997dd78549f8128'},
             {'ml_dtypes-0.3.2_EigenAvx512.patch': '197b05b0b7f611749824369f026099f6a172f9e8eab6ebb6504a16573746c892'},
         ],
     }),

@ThomasHoffmann77 ThomasHoffmann77 changed the title {ai}[gfbf/2024a] jax v0.4.35 w/ CUDA 12.6.0 {ai}[gfbf/2024a] jax v0.4.35 w/ CUDA 12.6.0 WIP Nov 28, 2024
@ThomasHoffmann77 ThomasHoffmann77 marked this pull request as draft November 28, 2024 13:43
@ThomasHoffmann77 ThomasHoffmann77 changed the title {ai}[gfbf/2024a] jax v0.4.35 w/ CUDA 12.6.0 WIP {ai}[gfbf/2024a] jax v0.4.35, ml_dtypes v0.5.0 w/ CUDA 12.6.0 WIP Nov 28, 2024
@github-actions github-actions bot added the change label Dec 2, 2024
@ThomasHoffmann77 ThomasHoffmann77 changed the title {ai}[gfbf/2024a] jax v0.4.35, ml_dtypes v0.5.0 w/ CUDA 12.6.0 WIP {ai}[gfbf/2024a] jax v0.4.34, ml_dtypes v0.5.0 w/ CUDA 12.6.0 WIP Dec 20, 2024
add versionsuffix to Clang dependency
update builddep pybind11 to v2.13.6
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant