From 3361a129a659355d9038a380e561c0a7467826d1 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 21 Oct 2025 04:11:33 -0500 Subject: [PATCH 1/6] Changes required for embedded GPU LLVM IR Flang runtime --- flang-rt/include/flang-rt/runtime/lock.h | 13 +++++++ flang-rt/include/flang-rt/runtime/tools.h | 6 ++++ flang-rt/lib/runtime/CMakeLists.txt | 2 +- flang-rt/lib/runtime/assign.cpp | 1 - flang-rt/lib/runtime/descriptor.cpp | 2 ++ flang-rt/lib/runtime/edit-input.cpp | 4 +++ flang-rt/lib/runtime/environment.cpp | 2 ++ flang-rt/lib/runtime/file.cpp | 2 ++ flang-rt/lib/runtime/io-api-minimal.cpp | 8 +++++ flang-rt/lib/runtime/io-api.cpp | 2 ++ flang-rt/lib/runtime/stop.cpp | 36 +++++++++++++++++++ flang/include/flang/Runtime/extensions.h | 5 ++- flang/include/flang/Runtime/main.h | 4 +++ libcxx/src/string.cpp | 2 ++ llvm/runtimes/CMakeLists.txt | 4 +-- offload/plugins-nextgen/common/CMakeLists.txt | 5 ++- openmp/device/src/EmissaryFortrt.cpp | 18 ---------- 17 files changed, 90 insertions(+), 26 deletions(-) diff --git a/flang-rt/include/flang-rt/runtime/lock.h b/flang-rt/include/flang-rt/runtime/lock.h index 7c88534245733..2924a9613d65c 100644 --- a/flang-rt/include/flang-rt/runtime/lock.h +++ b/flang-rt/include/flang-rt/runtime/lock.h @@ -23,7 +23,9 @@ #endif #if USE_PTHREADS +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include +#endif #elif defined(_WIN32) #include "flang/Common/windows-include.h" #else @@ -45,6 +47,7 @@ class Lock { RT_API_ATTRS void Drop() {} RT_API_ATTRS bool TakeIfNoDeadlock() { return true; } #elif USE_PTHREADS +#if not defined(__AMDGPU__) && not defined(__NVPTX__) Lock() { pthread_mutex_init(&mutex_, nullptr); } ~Lock() { pthread_mutex_destroy(&mutex_); } void Take() { @@ -68,6 +71,14 @@ class Lock { isBusy_ = false; pthread_mutex_unlock(&mutex_); } +#else + RT_API_ATTRS void Take(){} + RT_API_ATTRS bool TakeIfNoDeadlock() {return true;} + RT_API_ATTRS bool Try() {return true;} + RT_API_ATTRS void Drop() {} + Lock() {} + ~Lock() {} +#endif #elif defined(_WIN32) Lock() { InitializeCriticalSection(&cs_); } ~Lock() { DeleteCriticalSection(&cs_); } @@ -91,9 +102,11 @@ class Lock { #if RT_USE_PSEUDO_FILE_UNIT // No state. #elif USE_PTHREADS +#if not defined(__AMDGPU__) && not defined(__NVPTX__) pthread_mutex_t mutex_{}; volatile bool isBusy_{false}; volatile pthread_t holder_; +#endif #elif defined(_WIN32) CRITICAL_SECTION cs_; #else diff --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h index 1939c4d907be4..c63ac1c0cc404 100644 --- a/flang-rt/include/flang-rt/runtime/tools.h +++ b/flang-rt/include/flang-rt/runtime/tools.h @@ -42,6 +42,12 @@ #define RT_USE_PSEUDO_FILE_UNIT 1 #endif +#if defined(__AMDGPU__) && not defined(__NVPTX__) +// Use the pseudo lock and pseudo file unit implementations +// for the device. +#define RT_USE_PSEUDO_LOCK 1 +#define RT_USE_PSEUDO_FILE_UNIT 1 +#endif namespace Fortran::runtime { class Terminator; diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index ef0f812eaca00..9d1396f393de8 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -178,7 +178,7 @@ else () endif () if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") - set(sources ${gpu_sources}) + set(sources ${supported_sources} ${gpu_sources}) elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") set(sources ${supported_sources}) else () diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index 303ec79de240c..89e4260679225 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/assign.h" -#include "flang/Runtime/stop.h" #include "flang-rt/runtime/assign-impl.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/descriptor.h" diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index c95da0a5371e5..6dda2b0ade0a2 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -8,7 +8,9 @@ #include "flang-rt/runtime/descriptor.h" #include "ISO_Fortran_util.h" +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include "memory.h" +#endif #include "flang-rt/runtime/allocator-registry.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/stat.h" diff --git a/flang-rt/lib/runtime/edit-input.cpp b/flang-rt/lib/runtime/edit-input.cpp index 436fc3894d902..765e5e15d3cdc 100644 --- a/flang-rt/lib/runtime/edit-input.cpp +++ b/flang-rt/lib/runtime/edit-input.cpp @@ -569,7 +569,11 @@ static RT_API_ATTRS void RaiseFPExceptions( #ifdef feraisexcept // a macro in some environments; omit std:: #define RAISE feraiseexcept #else +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #define RAISE std::feraiseexcept +#else +#define RAISE +#endif #endif #endif // !defined(RT_DEVICE_COMPILATION) diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp index 97ac56236e799..8ce3bfcf68945 100644 --- a/flang-rt/lib/runtime/environment.cpp +++ b/flang-rt/lib/runtime/environment.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#if not defined (__AMDGPU__) && not defined(__NVPTX__) #include "flang-rt/runtime/environment.h" #include "environment-default-list.h" #include "memory.h" @@ -314,3 +315,4 @@ bool RTNAME(RegisterConfigureEnv)( } // extern "C" } // namespace Fortran::runtime +#endif diff --git a/flang-rt/lib/runtime/file.cpp b/flang-rt/lib/runtime/file.cpp index 8255ec8691886..d25a07e3c88ec 100644 --- a/flang-rt/lib/runtime/file.cpp +++ b/flang-rt/lib/runtime/file.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/tools.h" @@ -486,3 +487,4 @@ RT_API_ATTRS std::int64_t SizeInBytes(const char *path) { #endif // defined(RT_DEVICE_COMPILATION) } // namespace Fortran::runtime::io +#endif diff --git a/flang-rt/lib/runtime/io-api-minimal.cpp b/flang-rt/lib/runtime/io-api-minimal.cpp index f84b62d63baa1..374be065df7cc 100644 --- a/flang-rt/lib/runtime/io-api-minimal.cpp +++ b/flang-rt/lib/runtime/io-api-minimal.cpp @@ -23,6 +23,7 @@ namespace Fortran::runtime::io { RT_EXT_API_GROUP_BEGIN #endif +#if not defined(__AMDGPU__) && not defined(__NVPTX__) Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( @@ -33,6 +34,7 @@ enum Iostat IODEF(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); } +#endif template > inline RT_API_ATTRS bool FormattedScalarIntegerOutput( @@ -45,6 +47,7 @@ inline RT_API_ATTRS bool FormattedScalarIntegerOutput( } } +#if not defined(__AMDGPU__) && not defined(__NVPTX__) bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { return FormattedScalarIntegerOutput<1>(*cookie, n, "OutputInteger8"); } @@ -60,6 +63,7 @@ bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { return FormattedScalarIntegerOutput<8>(*cookie, n, "OutputInteger64"); } +#endif #ifdef __SIZEOF_INT128__ bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { @@ -79,6 +83,7 @@ inline RT_API_ATTRS bool FormattedScalarRealOutput( } } +#if not defined(__AMDGPU__) && not defined(__NVPTX__) bool IODEF(OutputReal32)(Cookie cookie, float x) { return FormattedScalarRealOutput<4>(*cookie, x, "OutputReal32"); } @@ -86,6 +91,7 @@ bool IODEF(OutputReal32)(Cookie cookie, float x) { bool IODEF(OutputReal64)(Cookie cookie, double x) { return FormattedScalarRealOutput<8>(*cookie, x, "OutputReal64"); } +#endif template ::BinaryFloatingPoint> @@ -110,6 +116,7 @@ inline RT_API_ATTRS bool FormattedScalarComplexOutput( return false; } +#if not defined(__AMDGPU__) && not defined(__NVPTX__) bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) { return FormattedScalarComplexOutput<4>(*cookie, re, im, "OutputComplex32"); } @@ -145,6 +152,7 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) { return false; } } +#endif } // namespace Fortran::runtime::io diff --git a/flang-rt/lib/runtime/io-api.cpp b/flang-rt/lib/runtime/io-api.cpp index 4c86fb9fdabf6..94830257f0cdf 100644 --- a/flang-rt/lib/runtime/io-api.cpp +++ b/flang-rt/lib/runtime/io-api.cpp @@ -199,12 +199,14 @@ RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, } } +#if not defined(__AMDGPU__) && not defined(__NVPTX__) Cookie IODEF(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, formatDescriptor, unitNumber, sourceFile, sourceLine); } +#endif Cookie IODEF(BeginExternalFormattedInput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, diff --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp index a12e9f14d90ec..488803d18e759 100644 --- a/flang-rt/lib/runtime/stop.cpp +++ b/flang-rt/lib/runtime/stop.cpp @@ -13,11 +13,15 @@ #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/io-error.h" #include "flang-rt/runtime/terminator.h" +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include "flang/Runtime/main.h" +#endif #include #include #include +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include +#endif #ifdef HAVE_BACKTRACE #include BACKTRACE_HEADER @@ -26,6 +30,7 @@ extern "C" { [[maybe_unused]] static void DescribeIEEESignaledExceptions() { +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #ifdef fetestexcept // a macro in some environments; omit std:: auto excepts{fetestexcept(FE_ALL_EXCEPT)}; #else @@ -60,6 +65,7 @@ extern "C" { #endif std::fputc('\n', stderr); } +#endif } static void CloseAllExternalUnits(const char *why) { @@ -85,6 +91,22 @@ static void CloseAllExternalUnits(const char *why) { std::printf("\n"); } Fortran::runtime::DeviceTrap(); +#elif defined(__AMDGPU__) || defined(__NVPTX__) + if (Fortran::runtime::executionEnvironment.noStopMessage && code == 0) { + quiet = true; + } + if (!quiet) { + if (isErrorStop) { + std::fprintf(stderr, "Fortran ERROR STOP"); + } else { + std::fprintf(stderr, "Fortran STOP"); + } + if (code != EXIT_SUCCESS) { + std::fprintf(stderr, ": code %d\n", code); + } + std::fprintf(stderr, "\n"); + } + std::exit(code); #else CloseAllExternalUnits("STOP statement"); if (Fortran::runtime::executionEnvironment.noStopMessage && code == 0) { @@ -116,6 +138,20 @@ static void CloseAllExternalUnits(const char *why) { } } Fortran::runtime::DeviceTrap(); +#elif defined(__AMDGPU__) || defined(__NVPTX__) + if (!quiet) { + if (Fortran::runtime::executionEnvironment.noStopMessage && !isErrorStop) { + std::fprintf(stderr, "%s\n", code); + } else { + std::fprintf(stderr, + "Fortran %s: %s\n", isErrorStop ? "ERROR STOP" : "STOP", code); + } + } + if (isErrorStop) { + std::exit(EXIT_FAILURE); + } else { + std::exit(EXIT_SUCCESS); + } #else CloseAllExternalUnits("STOP statement"); if (!quiet) { diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h index 9fd3e118a0f22..21e94f0048615 100644 --- a/flang/include/flang/Runtime/extensions.h +++ b/flang/include/flang/Runtime/extensions.h @@ -18,10 +18,13 @@ #define FORTRAN_PROCEDURE_NAME(name) name##_ -#ifdef _WIN32 +#if defined (_WIN32) || defined(__AMDGPU__) || defined(__NVPTX__) // UID and GID don't exist on Windows, these exist to avoid errors. typedef std::uint32_t uid_t; typedef std::uint32_t gid_t; +#elif defined(__AMDGPU__) || defined(__NVPTX__) +typedef std::uint32_t uid_t; +typedef std::uint32_t gid_t; #else #include "sys/types.h" //pid_t #endif diff --git a/flang/include/flang/Runtime/main.h b/flang/include/flang/Runtime/main.h index 40f7693221b6a..761841b738b45 100644 --- a/flang/include/flang/Runtime/main.h +++ b/flang/include/flang/Runtime/main.h @@ -11,11 +11,15 @@ #include "flang/Runtime/c-or-cpp.h" #include "flang/Runtime/entry-names.h" +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include +#endif struct EnvironmentDefaultList; +#if not defined(__AMDGPU__) && not defined(__NVPTX__) std::thread::id RTNAME(GetMainThreadId)(); +#endif FORTRAN_EXTERN_C_BEGIN void RTNAME(ProgramStart)( diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index 5028fc88fe46d..b9f8e01198c33 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -360,9 +360,11 @@ wstring to_wstring(unsigned long val) { return i_to_string(val); } wstring to_wstring(unsigned long long val) { return i_to_string(val); } #endif +#if not defined(__AMDGPU__) && not defined(__NVPTX__) string to_string(float val) { return as_string(snprintf, initial_string< string>()(), "%f", val); } string to_string(double val) { return as_string(snprintf, initial_string< string>()(), "%f", val); } string to_string(long double val) { return as_string(snprintf, initial_string< string>()(), "%Lf", val); } +#endif #if _LIBCPP_HAS_WIDE_CHARACTERS wstring to_wstring(float val) { return as_string(get_swprintf(), initial_string()(), L"%f", val); } diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index a7b8112e27e13..fe5b7f6bdcd4e 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -602,9 +602,9 @@ if(build_runtimes) # that all .mod files are also properly build. list(APPEND extra_deps "flang" "module_files") endif() - if (${LIBOMPTARGET_BUILD_DEVICE_FORTRT}) + #if (${LIBOMPTARGET_BUILD_DEVICE_FORTRT}) set(FORTRT_DEP FortranRuntime) - endif() + #endif() foreach(dep opt llvm-link llvm-extract clang llvm-offload-binary clang-nvlink-wrapper rocm-device-libs offload-arch ${HSA_DEP} ${AMDGPU_ARCH_DEP} ${FORTRT_DEP}) if(TARGET ${dep} AND OPENMP_ENABLE_LIBOMPTARGET) list(APPEND extra_deps ${dep}) diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index 198e2add6b2d3..f5f63ccaf03c4 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -44,11 +44,10 @@ endif() # Include the RPC server from the `libc` project. include(FindLibcCommonUtils) target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities) -if(OFFLOAD_ENABLE_EMISSARY_APIS AND LIBOMPTARGET_BUILD_DEVICE_FORTRT) +#if(OFFLOAD_ENABLE_EMISSARY_APIS AND LIBOMPTARGET_BUILD_DEVICE_FORTRT) target_link_libraries(PluginCommon PRIVATE flang_rt.runtime -L${CMAKE_BINARY_DIR}/../../lib -L${CMAKE_INSTALL_PREFIX}/lib) -endif() - +#endif() if (OMPT_TARGET_DEFAULT AND LIBOMPTARGET_OMPT_SUPPORT) add_library(PluginOmpt STATIC OMPT/OmptTracing.cpp OMPT/OmptProfiler.cpp) target_include_directories(PluginOmpt PUBLIC diff --git a/openmp/device/src/EmissaryFortrt.cpp b/openmp/device/src/EmissaryFortrt.cpp index 1e441213405c0..0ef535b328c38 100644 --- a/openmp/device/src/EmissaryFortrt.cpp +++ b/openmp/device/src/EmissaryFortrt.cpp @@ -119,23 +119,5 @@ bool _FortranAioOutputLogical(void *cookie, bool barg) { _PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAioOutputLogical_idx), _EXTRA_ARGS, cookie, barg); } -void _FortranAAbort() { - _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAAbort_idx), - _EXTRA_ARGS); - // When host service _FortranAAbort finishes, we must die from the device. - __builtin_trap(); -} -void _FortranAStopStatement(int32_t a1, bool a2, bool a3) { - _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAStopStatement_idx), - _EXTRA_ARGS, a1, a2, a3); - __builtin_trap(); -} -void _FortranAStopStatementText(char *errmsg, int64_t a1, bool a2, bool a3) { - errmsg[a1 - 1] = (char)0; - _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAStopStatementText_idx), - _EXTRA_ARGS, errmsg, a1, a2, a3); - __builtin_trap(); -} - } // end extern "C" #undef _EXTRA_ARGS From c10533f8b78faa68b4aba0e72643874195cb0e3a Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 22 Oct 2025 14:39:50 -0500 Subject: [PATCH 2/6] Use OpenMP emissary functions --- flang-rt/lib/runtime/stop.cpp | 36 +++++----------------------- openmp/device/src/EmissaryFortrt.cpp | 18 ++++++++++++++ 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp index 488803d18e759..702829d0b6ccb 100644 --- a/flang-rt/lib/runtime/stop.cpp +++ b/flang-rt/lib/runtime/stop.cpp @@ -73,6 +73,7 @@ static void CloseAllExternalUnits(const char *why) { Fortran::runtime::io::ExternalFileUnit::CloseAll(handler); } +#if not defined(__AMDGPU__) && not defined(__NVPTX__) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)( int code, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -91,22 +92,6 @@ static void CloseAllExternalUnits(const char *why) { std::printf("\n"); } Fortran::runtime::DeviceTrap(); -#elif defined(__AMDGPU__) || defined(__NVPTX__) - if (Fortran::runtime::executionEnvironment.noStopMessage && code == 0) { - quiet = true; - } - if (!quiet) { - if (isErrorStop) { - std::fprintf(stderr, "Fortran ERROR STOP"); - } else { - std::fprintf(stderr, "Fortran STOP"); - } - if (code != EXIT_SUCCESS) { - std::fprintf(stderr, ": code %d\n", code); - } - std::fprintf(stderr, "\n"); - } - std::exit(code); #else CloseAllExternalUnits("STOP statement"); if (Fortran::runtime::executionEnvironment.noStopMessage && code == 0) { @@ -125,7 +110,9 @@ static void CloseAllExternalUnits(const char *why) { std::exit(code); #endif } +#endif +#if not defined(__AMDGPU__) && not defined(__NVPTX__) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatementText)( const char *code, std::size_t length, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -138,20 +125,6 @@ static void CloseAllExternalUnits(const char *why) { } } Fortran::runtime::DeviceTrap(); -#elif defined(__AMDGPU__) || defined(__NVPTX__) - if (!quiet) { - if (Fortran::runtime::executionEnvironment.noStopMessage && !isErrorStop) { - std::fprintf(stderr, "%s\n", code); - } else { - std::fprintf(stderr, - "Fortran %s: %s\n", isErrorStop ? "ERROR STOP" : "STOP", code); - } - } - if (isErrorStop) { - std::exit(EXIT_FAILURE); - } else { - std::exit(EXIT_SUCCESS); - } #else CloseAllExternalUnits("STOP statement"); if (!quiet) { @@ -172,6 +145,7 @@ static void CloseAllExternalUnits(const char *why) { } #endif } +#endif static bool StartPause() { if (Fortran::runtime::io::IsATerminal(0)) { @@ -255,12 +229,14 @@ static RT_NOINLINE_ATTR void PrintBacktrace() { #endif } +#if not defined(__AMDGPU__) && not defined(__NVPTX__) [[noreturn]] RT_OPTNONE_ATTR void RTNAME(Abort)() { #ifdef HAVE_BACKTRACE PrintBacktrace(); #endif std::abort(); } +#endif RT_OPTNONE_ATTR void FORTRAN_PROCEDURE_NAME(backtrace)() { PrintBacktrace(); } diff --git a/openmp/device/src/EmissaryFortrt.cpp b/openmp/device/src/EmissaryFortrt.cpp index 0ef535b328c38..1e441213405c0 100644 --- a/openmp/device/src/EmissaryFortrt.cpp +++ b/openmp/device/src/EmissaryFortrt.cpp @@ -119,5 +119,23 @@ bool _FortranAioOutputLogical(void *cookie, bool barg) { _PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAioOutputLogical_idx), _EXTRA_ARGS, cookie, barg); } +void _FortranAAbort() { + _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAAbort_idx), + _EXTRA_ARGS); + // When host service _FortranAAbort finishes, we must die from the device. + __builtin_trap(); +} +void _FortranAStopStatement(int32_t a1, bool a2, bool a3) { + _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAStopStatement_idx), + _EXTRA_ARGS, a1, a2, a3); + __builtin_trap(); +} +void _FortranAStopStatementText(char *errmsg, int64_t a1, bool a2, bool a3) { + errmsg[a1 - 1] = (char)0; + _emissary_exec(_PACK_EMIS_IDS(EMIS_ID_FORTRT, _FortranAStopStatementText_idx), + _EXTRA_ARGS, errmsg, a1, a2, a3); + __builtin_trap(); +} + } // end extern "C" #undef _EXTRA_ARGS From e919e6d47272349b4620c2f28dc1ee164b370316 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 23 Oct 2025 07:47:49 -0500 Subject: [PATCH 3/6] Fix CMake files --- llvm/runtimes/CMakeLists.txt | 4 ++-- offload/plugins-nextgen/common/CMakeLists.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index fe5b7f6bdcd4e..a7b8112e27e13 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -602,9 +602,9 @@ if(build_runtimes) # that all .mod files are also properly build. list(APPEND extra_deps "flang" "module_files") endif() - #if (${LIBOMPTARGET_BUILD_DEVICE_FORTRT}) + if (${LIBOMPTARGET_BUILD_DEVICE_FORTRT}) set(FORTRT_DEP FortranRuntime) - #endif() + endif() foreach(dep opt llvm-link llvm-extract clang llvm-offload-binary clang-nvlink-wrapper rocm-device-libs offload-arch ${HSA_DEP} ${AMDGPU_ARCH_DEP} ${FORTRT_DEP}) if(TARGET ${dep} AND OPENMP_ENABLE_LIBOMPTARGET) list(APPEND extra_deps ${dep}) diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index f5f63ccaf03c4..fad361d943e91 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -44,10 +44,10 @@ endif() # Include the RPC server from the `libc` project. include(FindLibcCommonUtils) target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities) -#if(OFFLOAD_ENABLE_EMISSARY_APIS AND LIBOMPTARGET_BUILD_DEVICE_FORTRT) +if(OFFLOAD_ENABLE_EMISSARY_APIS AND LIBOMPTARGET_BUILD_DEVICE_FORTRT) target_link_libraries(PluginCommon PRIVATE flang_rt.runtime -L${CMAKE_BINARY_DIR}/../../lib -L${CMAKE_INSTALL_PREFIX}/lib) -#endif() +endif() if (OMPT_TARGET_DEFAULT AND LIBOMPTARGET_OMPT_SUPPORT) add_library(PluginOmpt STATIC OMPT/OmptTracing.cpp OMPT/OmptProfiler.cpp) target_include_directories(PluginOmpt PUBLIC From ca08f0a6b4c491088113e6411cc37910a21a39d8 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 28 Oct 2025 08:16:34 -0500 Subject: [PATCH 4/6] Restore generation of flang_rt.hostdevice --- flang-rt/CMakeLists.txt | 5 +++++ flang-rt/include/flang-rt/runtime/lock.h | 6 +++--- flang-rt/include/flang-rt/runtime/tools.h | 3 ++- flang-rt/lib/runtime/CMakeLists.txt | 6 +++++- flang-rt/lib/runtime/assign.cpp | 14 +++----------- flang-rt/lib/runtime/descriptor.cpp | 2 +- flang-rt/lib/runtime/edit-input.cpp | 2 +- flang-rt/lib/runtime/environment.cpp | 2 +- flang-rt/lib/runtime/file.cpp | 2 +- flang-rt/lib/runtime/io-api-minimal.cpp | 8 ++++---- flang-rt/lib/runtime/io-api.cpp | 2 +- flang-rt/lib/runtime/stop.cpp | 9 ++++----- flang/include/flang/Runtime/extensions.h | 4 ++-- flang/include/flang/Runtime/main.h | 4 ++-- 14 files changed, 35 insertions(+), 34 deletions(-) diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 17a51759e1a0e..b65378f012ecd 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -152,6 +152,11 @@ if (NOT "${FLANG_RT_LIBCXX_PROVIDER}" IN_LIST FLANG_RT_SUPPORTED_PROVIDERS) endif () option(FLANG_RT_ENABLE_STATIC "Build Flang-RT as a static library." ON) +option(FLANG_RT_EMBED_GPU_LLVM_IR "Build Flang-RT as GPU LLVM IR library" ON) +if (FLANG_RT_EMBED_GPU_LLVM_IR) + add_compile_definitions(EMBED_FLANG_RT_GPU_LLVM_IR) +endif () + if (WIN32) # Windows DLL currently not implemented. set(FLANG_RT_ENABLE_SHARED OFF) diff --git a/flang-rt/include/flang-rt/runtime/lock.h b/flang-rt/include/flang-rt/runtime/lock.h index 2924a9613d65c..0cffcf5e5deab 100644 --- a/flang-rt/include/flang-rt/runtime/lock.h +++ b/flang-rt/include/flang-rt/runtime/lock.h @@ -23,7 +23,7 @@ #endif #if USE_PTHREADS -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include #endif #elif defined(_WIN32) @@ -47,7 +47,7 @@ class Lock { RT_API_ATTRS void Drop() {} RT_API_ATTRS bool TakeIfNoDeadlock() { return true; } #elif USE_PTHREADS -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Lock() { pthread_mutex_init(&mutex_, nullptr); } ~Lock() { pthread_mutex_destroy(&mutex_); } void Take() { @@ -102,7 +102,7 @@ class Lock { #if RT_USE_PSEUDO_FILE_UNIT // No state. #elif USE_PTHREADS -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) pthread_mutex_t mutex_{}; volatile bool isBusy_{false}; volatile pthread_t holder_; diff --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h index c63ac1c0cc404..1c5554515037f 100644 --- a/flang-rt/include/flang-rt/runtime/tools.h +++ b/flang-rt/include/flang-rt/runtime/tools.h @@ -42,7 +42,8 @@ #define RT_USE_PSEUDO_FILE_UNIT 1 #endif -#if defined(__AMDGPU__) && not defined(__NVPTX__) + +#if (defined(__AMDGPU__) || defined(__NVPTX__)) && defined(EMBED_FLANG_RT_GPU_LLVM_IR) // Use the pseudo lock and pseudo file unit implementations // for the device. #define RT_USE_PSEUDO_LOCK 1 diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index 9d1396f393de8..ad46e8d847e5b 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -178,7 +178,11 @@ else () endif () if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") - set(sources ${supported_sources} ${gpu_sources}) + if (FLANG_RT_EMBED_GPU_LLVM_IR) + set(sources ${supported_sources} ${gpu_sources}) + else () + set(sources ${gpu_sources}) + endif () elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") set(sources ${supported_sources}) else () diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index 89e4260679225..4aa0d003dd163 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -7,6 +7,9 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/assign.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) +#include "flang/Runtime/stop.h" +#endif #include "flang-rt/runtime/assign-impl.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/descriptor.h" @@ -861,17 +864,6 @@ void RTDEF(AssignPolymorphic)(Descriptor &to, const Descriptor &from, PolymorphicLHS); } -#if defined(OMP_OFFLOAD_BUILD) -// To support a recently added use of variant in the OpenMP offload build, -// added an abort wrapper which calls the flang-rt FortranAAbort. -// Avoids the following linker error: -// ld.lld: error: undefined symbol: abort -// >>> referenced by /tmp/device_aassign.amdgcn.gfx90a-34a7ed.img.lto.o:(std::__throw_bad_variant_access(char const*)) -extern "C" void abort(void) { - RTNAME(Abort)(); -} -#endif - RT_EXT_API_GROUP_END } // extern "C" } // namespace Fortran::runtime diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index 6dda2b0ade0a2..147ff079fc2d9 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -8,7 +8,7 @@ #include "flang-rt/runtime/descriptor.h" #include "ISO_Fortran_util.h" -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "memory.h" #endif #include "flang-rt/runtime/allocator-registry.h" diff --git a/flang-rt/lib/runtime/edit-input.cpp b/flang-rt/lib/runtime/edit-input.cpp index 765e5e15d3cdc..32ff42a4d0221 100644 --- a/flang-rt/lib/runtime/edit-input.cpp +++ b/flang-rt/lib/runtime/edit-input.cpp @@ -569,7 +569,7 @@ static RT_API_ATTRS void RaiseFPExceptions( #ifdef feraisexcept // a macro in some environments; omit std:: #define RAISE feraiseexcept #else -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined (EMBED_FLANG_RT_GPU_LLVM_IR) #define RAISE std::feraiseexcept #else #define RAISE diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp index 8ce3bfcf68945..17ac09f841a4f 100644 --- a/flang-rt/lib/runtime/environment.cpp +++ b/flang-rt/lib/runtime/environment.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#if not defined (__AMDGPU__) && not defined(__NVPTX__) +#if (not defined (__AMDGPU__) && not defined(__NVPTX__)) || not defined (EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang-rt/runtime/environment.h" #include "environment-default-list.h" #include "memory.h" diff --git a/flang-rt/lib/runtime/file.cpp b/flang-rt/lib/runtime/file.cpp index d25a07e3c88ec..c3d9a5b8321a7 100644 --- a/flang-rt/lib/runtime/file.cpp +++ b/flang-rt/lib/runtime/file.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/tools.h" diff --git a/flang-rt/lib/runtime/io-api-minimal.cpp b/flang-rt/lib/runtime/io-api-minimal.cpp index 374be065df7cc..7077c20f41b4e 100644 --- a/flang-rt/lib/runtime/io-api-minimal.cpp +++ b/flang-rt/lib/runtime/io-api-minimal.cpp @@ -23,7 +23,7 @@ namespace Fortran::runtime::io { RT_EXT_API_GROUP_BEGIN #endif -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( @@ -47,7 +47,7 @@ inline RT_API_ATTRS bool FormattedScalarIntegerOutput( } } -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { return FormattedScalarIntegerOutput<1>(*cookie, n, "OutputInteger8"); } @@ -83,7 +83,7 @@ inline RT_API_ATTRS bool FormattedScalarRealOutput( } } -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputReal32)(Cookie cookie, float x) { return FormattedScalarRealOutput<4>(*cookie, x, "OutputReal32"); } @@ -116,7 +116,7 @@ inline RT_API_ATTRS bool FormattedScalarComplexOutput( return false; } -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) { return FormattedScalarComplexOutput<4>(*cookie, re, im, "OutputComplex32"); } diff --git a/flang-rt/lib/runtime/io-api.cpp b/flang-rt/lib/runtime/io-api.cpp index 94830257f0cdf..4eabde60a3a72 100644 --- a/flang-rt/lib/runtime/io-api.cpp +++ b/flang-rt/lib/runtime/io-api.cpp @@ -199,7 +199,7 @@ RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, } } -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Cookie IODEF(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { diff --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp index 702829d0b6ccb..87681dc8a1b1e 100644 --- a/flang-rt/lib/runtime/stop.cpp +++ b/flang-rt/lib/runtime/stop.cpp @@ -30,7 +30,7 @@ extern "C" { [[maybe_unused]] static void DescribeIEEESignaledExceptions() { -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #ifdef fetestexcept // a macro in some environments; omit std:: auto excepts{fetestexcept(FE_ALL_EXCEPT)}; #else @@ -73,7 +73,7 @@ static void CloseAllExternalUnits(const char *why) { Fortran::runtime::io::ExternalFileUnit::CloseAll(handler); } -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)( int code, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -112,7 +112,7 @@ static void CloseAllExternalUnits(const char *why) { } #endif -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatementText)( const char *code, std::size_t length, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -228,8 +228,7 @@ static RT_NOINLINE_ATTR void PrintBacktrace() { #endif } - -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_OPTNONE_ATTR void RTNAME(Abort)() { #ifdef HAVE_BACKTRACE PrintBacktrace(); diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h index 21e94f0048615..b357ca9d8fb3c 100644 --- a/flang/include/flang/Runtime/extensions.h +++ b/flang/include/flang/Runtime/extensions.h @@ -18,11 +18,11 @@ #define FORTRAN_PROCEDURE_NAME(name) name##_ -#if defined (_WIN32) || defined(__AMDGPU__) || defined(__NVPTX__) +#if defined (_WIN32) // UID and GID don't exist on Windows, these exist to avoid errors. typedef std::uint32_t uid_t; typedef std::uint32_t gid_t; -#elif defined(__AMDGPU__) || defined(__NVPTX__) +#elif (defined(__AMDGPU__) || defined(__NVPTX__)) && defined (EMBED_FLANG_RT_GPU_LLVM_IR) typedef std::uint32_t uid_t; typedef std::uint32_t gid_t; #else diff --git a/flang/include/flang/Runtime/main.h b/flang/include/flang/Runtime/main.h index 761841b738b45..696ce466e67fc 100644 --- a/flang/include/flang/Runtime/main.h +++ b/flang/include/flang/Runtime/main.h @@ -11,13 +11,13 @@ #include "flang/Runtime/c-or-cpp.h" #include "flang/Runtime/entry-names.h" -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include #endif struct EnvironmentDefaultList; -#if not defined(__AMDGPU__) && not defined(__NVPTX__) +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) std::thread::id RTNAME(GetMainThreadId)(); #endif From 9611bc7b5553c11820612f2ef7b2f7e4aff8e1f1 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 28 Oct 2025 08:17:22 -0500 Subject: [PATCH 5/6] Add option to not include FlangRT by default --- clang/include/clang/Driver/Options.td | 1 + clang/lib/Driver/ToolChains/Clang.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f6a434001152b..49ee8adc17112 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6132,6 +6132,7 @@ def : Flag<["-"], "nogpulib">, def : Flag<["-"], "nocudalib">, Alias; def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Link the LLVM C Library for GPUs">; +def nogpuflangrt : Flag<["-"], "nogpuflangrt">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def nogpulibc : Flag<["-"], "nogpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def nodefaultlibs : Flag<["-"], "nodefaultlibs">, Visibility<[ClangOption, FlangOption]>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index efc9318498cea..2b411de8168c9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9454,7 +9454,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("--device-linker=" + TC.getTripleString() + "=" + "-lclang_rt.builtins")); bool HasFlangRT = HasCompilerRT && C.getDriver().IsFlangMode(); - if (HasFlangRT) + if (HasFlangRT && !Args.hasArg(options::OPT_nogpuflangrt)) CmdArgs.push_back( Args.MakeArgString("--device-linker=" + TC.getTripleString() + "=" + "-lflang_rt.runtime")); From eaf4ee85b645e61235fa2e529dc670d3ed0e2d14 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 29 Oct 2025 05:37:12 -0500 Subject: [PATCH 6/6] Fix libc compilation issue --- libc/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 14718e2090bde..c55b477949a78 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -69,6 +69,7 @@ if(NOT LIBC_NAMESPACE MATCHES "^__llvm_libc") message(FATAL_ERROR "Invalid LIBC_NAMESPACE. Must start with '__llvm_libc' was '${LIBC_NAMESPACE}'") endif() +string(REPLACE "." "_" LIBC_NAMESPACE "${LIBC_NAMESPACE}") message(STATUS "Setting LIBC_NAMESPACE namespace to '${LIBC_NAMESPACE}'") add_compile_definitions(LIBC_NAMESPACE=${LIBC_NAMESPACE})