From 1fc885b35dacdeb2feef4af207a2daa2502bae08 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Thu, 13 Jun 2024 18:49:31 +0200 Subject: [PATCH] fix build of OpenBLAS 0.3.24 on A64FX --- .../o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb | 5 +- .../OpenBLAS/OpenBLAS-0.3.24_fix-A64FX.patch | 138 ++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-A64FX.patch diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb index 4835813e357..8d2315fbe6e 100644 --- a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24-GCC-13.2.0.eb @@ -20,6 +20,7 @@ patches = [ 'OpenBLAS-0.3.21_fix-order-vectorization.patch', 'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch', 'OpenBLAS-0.3.24_fix-czasum.patch', + 'OpenBLAS-0.3.24_fix-A64FX.patch', ] checksums = [ {'v0.3.24.tar.gz': 'ceadc5065da97bd92404cac7254da66cc6eb192679cf1002098688978d4d5132'}, @@ -33,8 +34,8 @@ checksums = [ '08af834e5d60441fd35c128758ed9c092ba6887c829e0471ecd489079539047d'}, {'OpenBLAS-0.3.23_disable-xDRGES-LAPACK-test.patch': 'ab7e0af05f9b2a2ced32f3875e1e3767d9c3531a455421a38f7324350178a0ff'}, - {'OpenBLAS-0.3.24_fix-czasum.patch': - '8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'}, + {'OpenBLAS-0.3.24_fix-czasum.patch': '8132b87c519fb08caa3bd7291fe8a1d0e1afe6fcb667d16f3020b46122afe20c'}, + {'OpenBLAS-0.3.24_fix-A64FX.patch': '3712e8c3f0024c7bb327958779c388ad0234ad6d58b7b118e605256ec089964c'}, ] builddependencies = [ diff --git a/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-A64FX.patch b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-A64FX.patch new file mode 100644 index 00000000000..ff4a16dea8d --- /dev/null +++ b/easybuild/easyconfigs/o/OpenBLAS/OpenBLAS-0.3.24_fix-A64FX.patch @@ -0,0 +1,138 @@ +fix installation of OpenBLAS 0.3.24 on Arm A64FX, +see https://github.com/OpenMathLib/OpenBLAS/pull/4258 + https://github.com/OpenMathLib/OpenBLAS/issues/4257 +diff --git a/kernel/arm64/KERNEL.A64FX b/kernel/arm64/KERNEL.A64FX +index bd25f7cd8a..ccbce27e1b 100644 +--- a/kernel/arm64/KERNEL.A64FX ++++ b/kernel/arm64/KERNEL.A64FX +@@ -57,7 +57,7 @@ CAMAXKERNEL = zamax.S + ZAMAXKERNEL = zamax.S + + SAXPYKERNEL = axpy.S +-DAXPYKERNEL = axpy.S ++DAXPYKERNEL = daxpy_thunderx2t99.S + CAXPYKERNEL = zaxpy.S + ZAXPYKERNEL = zaxpy.S + +@@ -81,45 +81,35 @@ DGEMVTKERNEL = gemv_t.S + CGEMVTKERNEL = zgemv_t.S + ZGEMVTKERNEL = zgemv_t.S + +- +-SASUMKERNEL = asum.S +-DASUMKERNEL = asum.S +-CASUMKERNEL = casum.S +-ZASUMKERNEL = zasum.S +- +-SCOPYKERNEL = copy.S +-DCOPYKERNEL = copy.S +-CCOPYKERNEL = copy.S +-ZCOPYKERNEL = copy.S +- +-SSWAPKERNEL = swap.S +-DSWAPKERNEL = swap.S +-CSWAPKERNEL = swap.S +-ZSWAPKERNEL = swap.S +- +-ISAMAXKERNEL = iamax.S +-IDAMAXKERNEL = iamax.S +-ICAMAXKERNEL = izamax.S +-IZAMAXKERNEL = izamax.S +- +-SNRM2KERNEL = nrm2.S +-DNRM2KERNEL = nrm2.S +-CNRM2KERNEL = znrm2.S +-ZNRM2KERNEL = znrm2.S +- +-DDOTKERNEL = dot.S +-ifneq ($(C_COMPILER), PGI) +-SDOTKERNEL = ../generic/dot.c +-else +-SDOTKERNEL = dot.S +-endif +-ifneq ($(C_COMPILER), PGI) +-CDOTKERNEL = zdot.S +-ZDOTKERNEL = zdot.S +-else +-CDOTKERNEL = ../arm/zdot.c +-ZDOTKERNEL = ../arm/zdot.c +-endif ++SASUMKERNEL = sasum_thunderx2t99.c ++DASUMKERNEL = dasum_thunderx2t99.c ++CASUMKERNEL = casum_thunderx2t99.c ++ZASUMKERNEL = zasum_thunderx2t99.c ++ ++SCOPYKERNEL = copy_thunderx2t99.c ++DCOPYKERNEL = copy_thunderx2t99.c ++CCOPYKERNEL = copy_thunderx2t99.c ++ZCOPYKERNEL = copy_thunderx2t99.c ++ ++SSWAPKERNEL = swap_thunderx2t99.S ++DSWAPKERNEL = swap_thunderx2t99.S ++CSWAPKERNEL = swap_thunderx2t99.S ++ZSWAPKERNEL = swap_thunderx2t99.S ++ ++ISAMAXKERNEL = iamax_thunderx2t99.c ++IDAMAXKERNEL = iamax_thunderx2t99.c ++ICAMAXKERNEL = izamax_thunderx2t99.c ++IZAMAXKERNEL = izamax_thunderx2t99.c ++ ++SNRM2KERNEL = scnrm2_thunderx2t99.c ++DNRM2KERNEL = dznrm2_thunderx2t99.c ++CNRM2KERNEL = scnrm2_thunderx2t99.c ++ZNRM2KERNEL = dznrm2_thunderx2t99.c ++ ++DDOTKERNEL = dot.c ++SDOTKERNEL = dot.c ++CDOTKERNEL = zdot_thunderx2t99.c ++ZDOTKERNEL = zdot_thunderx2t99.c + DSDOTKERNEL = dot.S + + DGEMM_BETA = dgemm_beta.S +@@ -128,10 +118,10 @@ SGEMM_BETA = sgemm_beta.S + SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S + STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S + +-SGEMMINCOPY = sgemm_ncopy_sve_v1.c +-SGEMMITCOPY = sgemm_tcopy_sve_v1.c +-SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S +-SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S ++SGEMMINCOPY = gemm_ncopy_sve_v1x$(SGEMM_UNROLL_N).c ++SGEMMITCOPY = gemm_tcopy_sve_v1x$(SGEMM_UNROLL_N).c ++SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S ++SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S + + SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) + SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +@@ -149,8 +139,8 @@ SSYMMLCOPY_M = symm_lcopy_sve.c + DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S + DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S + +-DGEMMINCOPY = dgemm_ncopy_sve_v1.c +-DGEMMITCOPY = dgemm_tcopy_sve_v1.c ++DGEMMINCOPY = gemm_ncopy_sve_v1x$(DGEMM_UNROLL_N).c ++DGEMMITCOPY = gemm_tcopy_sve_v1x$(DGEMM_UNROLL_N).c + DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S + DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S + +@@ -170,8 +160,8 @@ DSYMMLCOPY_M = symm_lcopy_sve.c + CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S + CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S + +-CGEMMINCOPY = cgemm_ncopy_sve_v1.c +-CGEMMITCOPY = cgemm_tcopy_sve_v1.c ++CGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c ++CGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c + CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c + CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c + +@@ -194,8 +184,8 @@ CSYMMLCOPY_M = zsymm_lcopy_sve.c + ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S + ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S + +-ZGEMMINCOPY = zgemm_ncopy_sve_v1.c +-ZGEMMITCOPY = zgemm_tcopy_sve_v1.c ++ZGEMMINCOPY = gemm_ncopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c ++ZGEMMITCOPY = gemm_tcopy_complex_sve_v1x$(ZGEMM_UNROLL_N).c + ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c + ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c +