diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch new file mode 100644 index 00000000000..b690fc529d5 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch @@ -0,0 +1,25 @@ +Fix use-after free leading to random failures in nn/test_embedding +on e.g. POWER platforms where FBGEMM isn't used + +From https://github.com/pytorch/pytorch/pull/84750 + +Author: Alexander Grund (TU Dresden) + +diff --git a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp +index 224a66f8abf..f4d018007bf 100644 +--- a/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp ++++ b/aten/src/ATen/native/quantized/cpu/qembeddingbag_prepack.cpp +@@ -252,9 +252,10 @@ Tensor& qembeddingbag_byte_prepack_out(Tensor& output, const Tensor& weight) { + } + + #else +- const auto weight_data = weight_contig->scalar_type() == at::ScalarType::Half +- ? weight_contig->to(at::ScalarType::Float).data_ptr() +- : weight_contig->data_ptr(); ++ const Tensor& float_weight = weight_contig->scalar_type() == at::ScalarType::Half ++ ? weight_contig->to(at::ScalarType::Float) ++ : *weight_contig; ++ const auto weight_data = float_weight.data_ptr(); + constexpr float kEpsilon = 1e-8f; + for (auto row : c10::irange(embedding_rows)) { + const float* input_row = weight_data + row * embedding_cols; diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0-foss-2022a-CUDA-11.7.0.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0-foss-2022a-CUDA-11.7.0.eb index e6bc2880ce8..59244119da2 100644 --- a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0-foss-2022a-CUDA-11.7.0.eb +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0-foss-2022a-CUDA-11.7.0.eb @@ -8,42 +8,100 @@ PyTorch is a deep learning framework that puts Python first.""" toolchain = {'name': 'foss', 'version': '2022a'} -sources = [{ - 'filename': '%(name)s-%(version)s.tar.gz', - 'git_config': { - 'url': 'https://github.com/pytorch', - 'repo_name': 'pytorch', - 'tag': 'v%(version)s', - 'recursive': True, - }, -}] +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] patches = [ - '%(name)s-1.7.0_avoid-nan-in-test-torch.patch', - '%(name)s-1.7.0_disable-dev-shm-test.patch', - '%(name)s-1.8.1_dont-use-gpu-ccc-in-test.patch', - '%(name)s-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch', - '%(name)s-1.10.0_fix-test-dataloader-fixed-affinity.patch', - '%(name)s-1.10.0_skip_cmake_rpath.patch', - '%(name)s-1.11.0_increase-distributed-test-timeout.patch', - '%(name)s-1.11.0_increase_c10d_gloo_timeout.patch', - '%(name)s-1.11.0_disable_failing_jit_cuda_fuser_tests.patch', + 'PyTorch-1.7.0_avoid-nan-in-test-torch.patch', + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-1.10.0_fix-kineto-crash.patch', + 'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch', + 'PyTorch-1.10.0_fix-test-model_dump.patch', + 'PyTorch-1.10.0_fix-vsx-vector-functions.patch', + 'PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch', + 'PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch', + 'PyTorch-1.11.0_fix-fsdp-fp16-test.patch', + 'PyTorch-1.11.0_fix-test_utils.patch', + 'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch', + 'PyTorch-1.11.0_increase-distributed-test-timeout.patch', + 'PyTorch-1.11.0_install-vsx-vec-headers.patch', + 'PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch', + 'PyTorch-1.12.1_add-hypothesis-suppression.patch', + 'PyTorch-1.12.1_fix-cuda-gcc-version-check.patch', + 'PyTorch-1.12.1_fix-skip-decorators.patch', + 'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch', + 'PyTorch-1.12.1_fix-test_wishart_log_prob.patch', + 'PyTorch-1.12.1_fix-TestCudaFuser.test_unary_ops.patch', + 'PyTorch-1.12.1_fix-TestTorch.test_to.patch', + 'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch', + 'PyTorch-1.12.1_fix-vsx-vector-funcs.patch', + 'PyTorch-1.12.1_fix-vsx-loadu.patch', + 'PyTorch-1.12.1_increase-test-adadelta-tolerance.patch', + 'PyTorch-1.12.1_increase-tolerance-test_ops.patch', + 'PyTorch-1.12.1_no-cuda-stubs-rpath.patch', + 'PyTorch-1.12.1_python-3.10-annotation-fix.patch', + 'PyTorch-1.12.1_python-3.10-compat.patch', + 'PyTorch-1.12.1_remove-flaky-test-in-testnn.patch', + 'PyTorch-1.12.1_skip-ao-sparsity-test-without-fbgemm.patch', + 'PyTorch-1.12.1_skip-failing-grad-test.patch', + 'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch', ] checksums = [ - None, # PyTorch-1.12.0.tar.gz - 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch - '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch - '89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch - # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch - 'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea', - # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch - '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707', - 'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch - # PyTorch-1.11.0_increase-distributed-test-timeout.patch - '087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f', - # PyTorch-1.11.0_increase_c10d_gloo_timeout.patch - '20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953', - # PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch - 'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51', + {'pytorch-v1.12.0.tar.gz': '46eff236370b759c427b03ff535c3597099043e8e467b8f81f9cd4b258a7a321'}, + {'PyTorch-1.7.0_avoid-nan-in-test-torch.patch': + 'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18'}, + {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, + {'PyTorch-1.10.0_fix-kineto-crash.patch': 'dc467333b28162149af8f675929d8c6bf219f23230bfc0d39af02ba4f6f882eb'}, + {'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch': + '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707'}, + {'PyTorch-1.10.0_fix-test-model_dump.patch': '339148ae1a028cda6e750ac93fa38a599f66c7abe26586c9219f1a206ea14557'}, + {'PyTorch-1.10.0_fix-vsx-vector-functions.patch': + '7bef5f96cb83b2d655d2f76dd7468a171d446f0b3e06da2232ec7f886484d312'}, + {'PyTorch-1.10.0_skip-nnapi-test-without-qnnpack.patch': + '34ba476a7bcddec323bf9eca083cb4623d0f569d081aa3add3769c24f22849d2'}, + {'PyTorch-1.11.0_fix-fp16-quantization-without-fbgemm.patch': + 'cc526130b6446bbbf5f0f7372d3aeee3e7d4c4d6e471524dff028b430b152934'}, + {'PyTorch-1.11.0_fix-fsdp-fp16-test.patch': 'bb1c4e6d6fd4b0cf57ff8b824c797331b533bb1ffc63f5db0bae3aee10c3dc13'}, + {'PyTorch-1.11.0_fix-test_utils.patch': '4f7e25c4e2eb7094f92607df74488c6a4a35849fabf05fcf6c3655fa3f44a861'}, + {'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch': + '20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953'}, + {'PyTorch-1.11.0_increase-distributed-test-timeout.patch': + '087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f'}, + {'PyTorch-1.11.0_install-vsx-vec-headers.patch': + 'f2e6b9625733d9a471bb75e1ea20e28814cf1380b4f9089aa838ee35ddecf07d'}, + {'PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch': + '090598592283e3fc46ee08a68b6a6afe07be41b26514afba51834408bf1c98ed'}, + {'PyTorch-1.12.1_add-hypothesis-suppression.patch': + 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, + {'PyTorch-1.12.1_fix-cuda-gcc-version-check.patch': + 'a650f4576f06c749f244cada52ff9c02499fa8f182019129488db3845e0756ab'}, + {'PyTorch-1.12.1_fix-skip-decorators.patch': 'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c'}, + {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch': + '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'}, + {'PyTorch-1.12.1_fix-test_wishart_log_prob.patch': + 'cf475ae6e6234b96c8d1bf917597c5176c94b3ccd940b72f2e1cd0c979580f45'}, + {'PyTorch-1.12.1_fix-TestCudaFuser.test_unary_ops.patch': + '8e6e844c6b0541e0c8115911ee1a9d548613254b36dfbdada202fd723fc26aa2'}, + {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, + {'PyTorch-1.12.1_fix-use-after-free-in-tensorpipe-agent.patch': + '0bd7e88b92c4c6f0fecf01746009858ba19f2df68b10b88c41485328a531875d'}, + {'PyTorch-1.12.1_fix-vsx-vector-funcs.patch': 'caccbf60f62eac313896c1eaec78b08f5d0fdfcb907079087490bb13d1561aa2'}, + {'PyTorch-1.12.1_fix-vsx-loadu.patch': '8bfe3c94ada1dd1f7974a1261a8b576fb7ae944050fa1c7830fca033831123b2'}, + {'PyTorch-1.12.1_increase-test-adadelta-tolerance.patch': + '944ed1af5ad4bbe20cbb042764a88dad1eef6cd33218617cf3d4cd90c6764695'}, + {'PyTorch-1.12.1_increase-tolerance-test_ops.patch': + '1c1fa520801e2ee5faf56a3d6dc96321e7c11664fd16bffd7c6ee437e68357fb'}, + {'PyTorch-1.12.1_no-cuda-stubs-rpath.patch': '2905826ca713752b47c84e4ec8b177c90cbd91fca498ba2ba546f495c4cf70a6'}, + {'PyTorch-1.12.1_python-3.10-annotation-fix.patch': + '11e168fd429d9e156fc79dd806b08125f3640651ad9998abd810446b2ed0c2d7'}, + {'PyTorch-1.12.1_python-3.10-compat.patch': '81402420a878b40f824778f0333fbec6504325a6a1b06a22749c4cac3eaccf67'}, + {'PyTorch-1.12.1_remove-flaky-test-in-testnn.patch': + 'e81b678e354dd137c0d6d974605cdedbf672096fdbdf567c347bc2fbfc73471d'}, + {'PyTorch-1.12.1_skip-ao-sparsity-test-without-fbgemm.patch': + 'edd464ec8c37b44c07a72008d732604f6837f2dd61c7810c391a86ba4945ca39'}, + {'PyTorch-1.12.1_skip-failing-grad-test.patch': + '1c89e7e67287fe6b9a95480a4178d3653b94d0ab2fe68edf227606c8ae548fdc'}, + {'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch': + '1435fcac3234edc865479199673b902eb67f6a2bd046af7d731141f03594666d'}, ] builddependencies = [ diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch new file mode 100644 index 00000000000..644c5d87545 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-1.12.0_fix-EmbeddingBag-without-fbgemm.patch @@ -0,0 +1,48 @@ +There is a bug in the fallback path for the case where FBGEMM isn't available (e.g. on POWER) +which leads to a race condition: +Data is "copied" for the full buffer while it is processed in chunks by different threads. +This a) duplicates the work and b) might write incomplete/wrong data to the output. + +Found in failing test_embedding_bag_half_cpu_* of nn/test_embedding: +ERROR: test_embedding_bag_half_cpu_int32_int32 (__main__.TestEmbeddingNNDeviceTypeCPU) +---------------------------------------------------------------------- +Traceback (most recent call last): + File "/dev/shm/s3248973-EasyBuild/PyTorch/1.13.1/foss-2022a/pytorch-v1.13.1/test/nn/test_embedding.py", line 936, in _test_EmbeddingBag_vs_Embedding + self.assertEqual(output, ref_output, atol=dtype2prec_DONTUSE[wdtype], rtol=0) + File "/tmp/eb-tmp-2022a/lib/python3.10/site-packages/torch/testing/_internal/common_utils.py", line 2470, in assertEqual + assert_equal( + File "/tmp/eb-tmp-2022a/lib/python3.10/site-packages/torch/testing/_comparison.py", line 1093, in assert_equal + raise error_metas[0].to_error(msg) +AssertionError: Tensor-likes are not close! + +Mismatched elements: 1 / 4 (25.0%) +Greatest absolute difference: 1.18359375 at index (1, 1) (up to 0.01 allowed) +Greatest relative difference: 1.0 at index (1, 1) (up to 0 allowed) + + +Introduced by https://github.com/pytorch/pytorch/pull/74844 + +Author: Alexander Grund (TU Dresden) + +diff --git a/aten/src/ATen/native/EmbeddingBag.cpp b/aten/src/ATen/native/EmbeddingBag.cpp +index 6d8cea26f52..604ea16bace 100644 +--- a/aten/src/ATen/native/EmbeddingBag.cpp ++++ b/aten/src/ATen/native/EmbeddingBag.cpp +@@ -246,7 +246,7 @@ index_select_add(const Tensor &select_indices, + /*scale_bias=*/nullptr, + /*normalize_by_lengths=*/false, + /*out=*/output_data_fp32 + start_idx * ddim); +- for (const auto i : c10::irange(output_size)) { ++ for (const auto i : c10::irange(start_idx, end_idx)) { + // Convert FP32 intermediate buffer result back to FP16 for output dtype + for (const auto d : c10::irange(ddim)) { + (output_data + i * ddim)[d] = static_cast((output_data_fp32 + ddim * i)[d]); +@@ -590,7 +590,7 @@ index_select_scale_add(const Tensor &select_indices, + /*scale_bias=*/nullptr, + /*normalize_by_lengths=*/false, + /*out=*/output_data_fp32 + start_idx * ddim); +- for (const auto i : c10::irange(output_size)) { ++ for (const auto i : c10::irange(start_idx, end_idx)) { + // Convert FP32 intermediate buffer result back to FP16 for output dtype + for (const auto d : c10::irange(ddim)) { + (output_data + i * ddim)[d] = static_cast((output_data_fp32 + ddim * i)[d]);