Skip to content

Commit

Permalink
Merge pull request lammps#4322 from stanmoore1/kk_update_4.4.0
Browse files Browse the repository at this point in the history
Update Kokkos library in LAMMPS to v4.4.1
  • Loading branch information
akohlmey authored Sep 26, 2024
2 parents b95f7c2 + 80832ce commit ec2fece
Show file tree
Hide file tree
Showing 289 changed files with 14,900 additions and 10,115 deletions.
35 changes: 30 additions & 5 deletions cmake/Modules/Packages/KOKKOS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,24 @@ endif()
########################################################################
# consistency checks and Kokkos options/settings required by LAMMPS
if(Kokkos_ENABLE_CUDA)
message(STATUS "KOKKOS: Enabling CUDA LAMBDA function support")
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE)
option(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC "CUDA asynchronous malloc support" OFF)
mark_as_advanced(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
if(Kokkos_ENABLE_IMPL_CUDA_MALLOC_ASYNC)
message(STATUS "KOKKOS: CUDA malloc async support enabled")
else()
message(STATUS "KOKKOS: CUDA malloc async support disabled")
endif()
endif()
if(Kokkos_ENABLE_HIP)
option(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS "Enable multiple kernel instantiations with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS)
option(Kokkos_ENABLE_ROCTHRUST "Use RoCThrust library" ON)
mark_as_advanced(Kokkos_ENABLE_ROCTHRUST)

if(Kokkos_ARCH_AMD_GFX942 OR Kokkos_ARCH_AMD_GFX940)
option(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY "Enable unified memory with HIP" ON)
mark_as_advanced(Kokkos_ENABLE_IMPL_HIP_UNIFIED_MEMORY)
endif()
endif()
# Adding OpenMP compiler flags without the checks done for
# BUILD_OMP can result in compile failures. Enforce consistency.
Expand All @@ -18,6 +34,15 @@ if(Kokkos_ENABLE_OPENMP)
message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP")
endif()
endif()

if(Kokkos_ENABLE_SERIAL)
if(NOT (Kokkos_ENABLE_OPENMP OR Kokkos_ENABLE_THREADS OR
Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL
OR Kokkos_ENABLE_OPENMPTARGET))
option(Kokkos_ENABLE_ATOMICS_BYPASS "Disable atomics for Kokkos Serial Backend" ON)
mark_as_advanced(Kokkos_ENABLE_ATOMICS_BYPASS)
endif()
endif()
########################################################################

option(EXTERNAL_KOKKOS "Build against external kokkos library" OFF)
Expand Down Expand Up @@ -45,8 +70,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.3.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "243de871b3dc2cf3990c1c404032df83" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/4.4.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "de6ee80d00b6212b02bfb7f1e71a8392" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
Expand All @@ -71,7 +96,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 4.3.01 REQUIRED CONFIG)
find_package(Kokkos 4.4.01 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
Expand Down
93 changes: 92 additions & 1 deletion lib/kokkos/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,103 @@
# CHANGELOG

## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01)

### Features:
* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229)

### Backend and Architecture Enhancements:

#### CUDA:
* Experimental support for unified memory mode (intended for Grace-Hopper etc.) [\#6823](https://github.com/kokkos/kokkos/pull/6823)

### Bug Fixes
* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284)
* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296)

## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00)

### Features:
* Add `Kokkos::View` conversions from and to [`std::mdspan`](https://en.cppreference.com/w/cpp/container/mdspan) [\#6830](https://github.com/kokkos/kokkos/pull/6830) [\#7069](https://github.com/kokkos/kokkos/pull/7069)

### Backend and Architecture Enhancements:

#### CUDA:
* `nvcc_wrapper`: Adding ability to process `--disable-warnings` flag [\#6936](https://github.com/kokkos/kokkos/issues/6936)
* Use recommended/max team size functions in Cuda ParallelFor and Reduce constructors [\#6891](https://github.com/kokkos/kokkos/issues/6891)
* Improve compile-times when building with `Kokkos_ENABLE_DEBUG_BOUNDS_CHECK` in Cuda [\#7013](https://github.com/kokkos/kokkos/pull/7013)

#### HIP:
* Use HIP builtin atomics [\#6882](https://github.com/kokkos/kokkos/pull/6882) [\#7000](https://github.com/kokkos/kokkos/pull/7000)
* Enable user-specified compiler and linker flags for AMD GPUs [\#7127](https://github.com/kokkos/kokkos/pull/7127)

#### SYCL:
* Add support for Graphs [\#6912](https://github.com/kokkos/kokkos/pull/6912)
* Fix multi-GPU support [\#6887](https://github.com/kokkos/kokkos/pull/6887)
* Improve performance of reduction and scan operations [\#6562](https://github.com/kokkos/kokkos/pull/6562), [\#6750](https://github.com/kokkos/kokkos/pull/6750)
* Fix lock for guarding scratch space in `TeamPolicy` `parallel_reduce` [\#6988](https://github.com/kokkos/kokkos/pull/6988)
* Include submission command queue property information into `SYCL::print_configuration()` [\#7004](https://github.com/kokkos/kokkos/pull/7004)

#### OpenACC:
* Make `TeamPolicy` `parallel_for` execute on the correct async queue [\#7012](https://github.com/kokkos/kokkos/pull/7012)

#### OpenMPTarget:
* Honor user requested loop ordering in `MDRange` policy [\#6925](https://github.com/kokkos/kokkos/pull/6925)
* Prevent data races by guarding the scratch space used in `parallel_scan` [\#6998](https://github.com/kokkos/kokkos/pull/6998)

#### HPX:
* Workaround issue with template argument deduction to support compilation with NVCC [\#7015](https://github.com/kokkos/kokkos/pull/7015)

### General Enhancements
* Improve performance of view copies in host parallel regions [\#6730](https://github.com/kokkos/kokkos/pull/6730)
* Harmonize convertibility rules of `Kokkos::RandomAccessIterator` with `View`s [\#6929](https://github.com/kokkos/kokkos/pull/6929)
* Add a check precondition non-overlapping ranges for the `adjacent_difference` algorithm in debug mode [\#6922](https://github.com/kokkos/kokkos/pull/6922)
* Add deduction guides for `TeamPolicy` [\#7030](https://github.com/kokkos/kokkos/pull/7030)
* SIMD: Allow flexible vector width for 32 bit types [\#6802](https://github.com/kokkos/kokkos/pull/6802)
* Updates for `Kokkos::Array`: add `kokkos_swap(Array<T, N>)` specialization [\#6943](https://github.com/kokkos/kokkos/pull/6943), add `Kokkos::to_array` [\#6375](https://github.com/kokkos/kokkos/pull/6375), make `Kokkos::Array` equality-comparable [\#7148](https://github.com/kokkos/kokkos/pull/7148)
* Structured binding support for `Kokkos::complex` [\#7040](https://github.com/kokkos/kokkos/pull/7040)

### Build System Changes
* Do not require OpenMP support for languages other than CXX [\#6965](https://github.com/kokkos/kokkos/pull/6965)
* Update Intel GPU architectures in Makefile [\#6895](https://github.com/kokkos/kokkos/pull/6895)
* Fix use of OpenMP with Cuda or HIP as compile language [\#6972](https://github.com/kokkos/kokkos/pull/6972)
* Define and enforce new minimum compiler versions for C++20 support [\#7128](https://github.com/kokkos/kokkos/pull/7128), [\#7123](https://github.com/kokkos/kokkos/pull/7123)
* Add nvidia Grace CPU architecture: `Kokkos_ARCH_ARMV9_GRACE` [\#7158](https://github.com/kokkos/kokkos/pull/7158)
* Fix Makefile.kokkos for Threads [\#6896](https://github.com/kokkos/kokkos/pull/6896)
* Remove support for NVHPC as CUDA device compiler [\#6987](https://github.com/kokkos/kokkos/pull/6987)
* Fix using CUDAToolkit for CMake 3.28.4 and higher [\#7062](https://github.com/kokkos/kokkos/pull/7062)

### Incompatibilities (i.e. breaking changes)
* Drop `Kokkos::Array` special treatment in `View`s [\#6906](https://github.com/kokkos/kokkos/pull/6906)
* Drop `Experimental::RawMemoryAllocationFailure` [\#7145](https://github.com/kokkos/kokkos/pull/7145)

### Deprecations
* Remove `Experimental::LayoutTiled` class template and deprecate `is_layouttiled` trait [\#6907](https://github.com/kokkos/kokkos/pull/6907)
* Deprecate `Kokkos::layout_iterate_type_selector` [\#7076](https://github.com/kokkos/kokkos/pull/7076)
* Deprecate specialization of `Kokkos::pair` for a single element [\#6947](https://github.com/kokkos/kokkos/pull/6947)
* Deprecate `deep_copy` of `UnorderedMap` of different size [\#6812](https://github.com/kokkos/kokkos/pull/6812)
* Deprecate trailing `Proxy` template argument of `Kokkos::Array` [\#6934](https://github.com/kokkos/kokkos/pull/6934)
* Deprecate implicit conversions of integers to `ChunkSize` [\#7151](https://github.com/kokkos/kokkos/pull/7151)
* Deprecate implicit conversions to execution spaces [\#7156](https://github.com/kokkos/kokkos/pull/7156)

### Bug Fixes
* Do not return a copy of the input functor in `Experimental::for_each` [\#6910](https://github.com/kokkos/kokkos/pull/6910)
* Fix `realloc` on views of non-default constructible element types [\#6993](https://github.com/kokkos/kokkos/pull/6993)
* Fix undefined behavior in `View` initialization or fill with zeros [\#7014](https://github.com/kokkos/kokkos/pull/7014)
* Fix `sort_by_key` on host execution spaces when building with NVCC [\#7059](https://github.com/kokkos/kokkos/pull/7059)
* Fix using shared libraries and -fvisibility=hidden [\#7065](https://github.com/kokkos/kokkos/pull/7065)
* Fix view reference counting when functor copy constructor throws in parallel dispatch [\#6289](https://github.com/kokkos/kokkos/pull/6289)
* Fix `initialize(InitializationSetting)` for handling `print_configuration` setting [\#7098](https://github.com/kokkos/kokkos/pull/7098)
* Thread safety fixes for the Serial and OpenMP backend [\#7080](https://github.com/kokkos/kokkos/pull/7080), [\#6151](https://github.com/kokkos/kokkos/pull/6151)

## [4.3.01](https://github.com/kokkos/kokkos/tree/4.3.01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.00...4.3.01)

### Backend and Architecture Enhancements:

#### HIP:
* MI300 support unified memory support [\#6877](https://github.com/kokkos/kokkos/pull/6877)
* MI300 support unified memory [\#6877](https://github.com/kokkos/kokkos/pull/6877)

### Bug Fixes
* Serial: Use the provided execution space instance in TeamPolicy [\#6951](https://github.com/kokkos/kokkos/pull/6951)
Expand Down
65 changes: 65 additions & 0 deletions lib/kokkos/CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
cff-version: 1.2.0
title: Kokkos
message: >-
If you use this software, please cite the overview paper
type: software
authors:
- name: The Kokkos authors
website: https://kokkos.org/community/team/
identifiers:
- type: url
website: https://kokkos.org/kokkos-core-wiki/citation.html
repository-code: 'https://github.com/kokkos/kokkos'
url: 'https://kokkos.org/'
license: Apache-2.0
preferred-citation:
type: article
authors:
- given-names: Christian R.
family-names: Trott
- given-names: Damien
family-names: Lebrun-Grandié
- given-names: Daniel
family-names: Arndt
- family-names: Ciesko
given-names: Jan
- given-names: Vinh
family-names: Dang
- family-names: Ellingwood
given-names: Nathan
- given-names: Rahulkumar
family-names: Gayatri
- given-names: Evan
family-names: Harvey
- given-names: Daisy S.
family-names: Hollman
- given-names: Dan
family-names: Ibanez
- given-names: Nevin
family-names: Liber
- given-names: Jonathan
family-names: Madsen
- given-names: Jeff
family-names: Miles
- given-names: David
family-names: Poliakoff
- given-names: Amy
family-names: Powell
- given-names: Sivasankaran
family-names: Rajamanickam
- given-names: Mikael
family-names: Simberg
- given-names: Dan
family-names: Sunderland
- given-names: Bruno
family-names: Turcksin
- given-names: Jeremiah
family-names: Wilke
doi: 10.1109/TPDS.2021.3097283
journal: IEEE Transactions on Parallel and Distributed Systems
start: 805
end: 817
title: "Kokkos 3: Programming Model Extensions for the Exascale Era"
volume: 33
issue: 4
year: 2022
2 changes: 1 addition & 1 deletion lib/kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ ENDIF()


set(Kokkos_VERSION_MAJOR 4)
set(Kokkos_VERSION_MINOR 3)
set(Kokkos_VERSION_MINOR 4)
set(Kokkos_VERSION_PATCH 1)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
Expand Down
62 changes: 49 additions & 13 deletions lib/kokkos/Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ CXXFLAGS += $(SHFLAGS)
endif

KOKKOS_VERSION_MAJOR = 4
KOKKOS_VERSION_MINOR = 3
KOKKOS_VERSION_MINOR = 4
KOKKOS_VERSION_PATCH = 1
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)

Expand All @@ -21,11 +21,11 @@ KOKKOS_DEVICES ?= "OpenMP"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKL,SKX,ICL,ICX,SPR
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75,Ampere80,Ampere86,Ada89,Hopper90
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2,A64FX,ARMv9-Grace
# IBM: Power8,Power9
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100,AMD_GFX1103
# AMD-GPUS: AMD_GFX906,AMD_GFX908,AMD_GFX90A,AMD_GFX940,AMD_GFX942,AMD_GFX1030,AMD_GFX1100
# AMD-CPUS: AMDAVX,Zen,Zen2,Zen3
# Intel-GPUs: Gen9,Gen11,Gen12LP,DG1,XeHP,PVC
# Intel-GPUs: Intel_Gen,Intel_Gen9,Intel_Gen11,Intel_Gen12LP,Intel_DG1,Intel_XeHP,Intel_PVC
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
Expand All @@ -41,7 +41,7 @@ KOKKOS_STANDALONE_CMAKE ?= "no"

# Default settings specific options.
# Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr,disable_malloc_async
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
KOKKOS_CUDA_OPTIONS ?= "disable_malloc_async"

# Options: rdc
KOKKOS_HIP_OPTIONS ?= ""
Expand Down Expand Up @@ -328,12 +328,43 @@ KOKKOS_INTERNAL_USE_ARCH_ICL := $(call kokkos_has_string,$(KOKKOS_ARCH),ICL)
KOKKOS_INTERNAL_USE_ARCH_ICX := $(call kokkos_has_string,$(KOKKOS_ARCH),ICX)
KOKKOS_INTERNAL_USE_ARCH_SPR := $(call kokkos_has_string,$(KOKKOS_ARCH),SPR)

KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
# Traditionally, we supported, e.g., IntelGen9 instead of Intel_Gen9. The latter
# matches the CMake option but we also accept the former for backward-compatibility.
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen11)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen11)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen12LP)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen12LP)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen9)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen9)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN9) \
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN11) \
+ $(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN12LP))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN_SET), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelGen)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_GEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_Gen)
endif
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelDG1)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_DG1 := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_DG1)
endif
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),IntelXeHP)
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP), 0)
KOKKOS_INTERNAL_USE_ARCH_INTEL_XEHP := $(call kokkos_has_string,$(KOKKOS_ARCH),Intel_XeHP)
endif
# Traditionally the architecture was called PVC instead of Intel_PVC. This
# version makes us accept IntelPVC and Intel_PVC as well.
KOKKOS_INTERNAL_USE_ARCH_INTEL_PVC := $(call kokkos_has_string,$(KOKKOS_ARCH),PVC)

# NVIDIA based.
Expand Down Expand Up @@ -394,7 +425,8 @@ KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
KOKKOS_INTERNAL_USE_ARCH_A64FX := $(call kokkos_has_string,$(KOKKOS_ARCH),A64FX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX) | bc))
KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv9-Grace)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2)+$(KOKKOS_INTERNAL_USE_ARCH_A64FX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE) | bc))

# IBM based.
KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(call kokkos_has_string,$(KOKKOS_ARCH),Power8)
Expand Down Expand Up @@ -433,7 +465,6 @@ KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 0)
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100 := $(call kokkos_has_string,$(KOKKOS_ARCH),NAVI1100)
endif
KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103 := $(call kokkos_has_string,$(KOKKOS_ARCH),AMD_GFX1103)

# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_SNB) + $(KOKKOS_INTERNAL_USE_ARCH_AMDAVX))
Expand Down Expand Up @@ -758,6 +789,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_A64FX), 1)
endif
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV9_GRACE), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARMV9_GRACE")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_ARM_NEON")

KOKKOS_CXXFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
KOKKOS_LDFLAGS += -mcpu=neoverse-v2 -msve-vector-bits=128
endif

ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ZEN), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_ZEN")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AVX2")
Expand Down Expand Up @@ -1119,11 +1158,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1100), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1100
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AMD_GFX1103), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GFX1103")
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ARCH_AMD_GPU")
KOKKOS_INTERNAL_HIP_ARCH_FLAG := --offload-arch=gfx1103
endif


ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1)
Expand Down Expand Up @@ -1216,6 +1250,8 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_BUNDLED_MDSPAN), 0)
endif
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_MDSPAN")

tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY")

KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)

ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
Expand Down
Loading

0 comments on commit ec2fece

Please sign in to comment.