diff --git a/build_tools/configure/configure.py b/build_tools/configure/configure.py index 508307bb89995..a6879521e7a3e 100755 --- a/build_tools/configure/configure.py +++ b/build_tools/configure/configure.py @@ -186,6 +186,8 @@ class CudaCompiler(ArgparseableEnum): CLANG = enum.auto() NVCC = enum.auto() +class RocmCompiler(ArgparseableEnum): + HIPCC = enum.auto() class OS(ArgparseableEnum): LINUX = enum.auto() @@ -263,6 +265,9 @@ class XLAConfigOptions: cuda_compiler: CudaCompiler using_nccl: bool + # ROCM specific + rocm_compiler: RocmCompiler + def to_bazelrc_lines( self, dpav: DiscoverablePathsAndVersions, @@ -352,6 +357,21 @@ def to_bazelrc_lines( elif self.backend == Backend.ROCM: build_and_test_tag_filters.append("-cuda-only") build_and_test_tag_filters.append("-sycl-only") + + compiler_pair = self.rocm_compiler, self.host_compiler + + if compiler_pair == (RocmCompiler.HIPCC, HostCompiler.CLANG): + rc.append("build --config rocm") + # This is demanded by rocm_configure.bzl. + rc.append( + f"build --action_env CLANG_COMPILER_PATH={dpav.clang_path}" + ) + elif compiler_pair == (RocmCompiler.HIPCC, HostCompiler.GCC): + rc.append("build --config rocm") + else: + raise NotImplementedError( + "ROCm clang with host compiler not supported" + ) elif self.backend == Backend.SYCL: build_and_test_tag_filters.append("-cuda-only") build_and_test_tag_filters.append("-rocm-only") diff --git a/third_party/tsl/third_party/gpus/crosstool/BUILD.rocm.tpl b/third_party/tsl/third_party/gpus/crosstool/BUILD.rocm.tpl index a742cfcd208ec..08038e59c597d 100644 --- a/third_party/tsl/third_party/gpus/crosstool/BUILD.rocm.tpl +++ b/third_party/tsl/third_party/gpus/crosstool/BUILD.rocm.tpl @@ -82,19 +82,17 @@ cc_toolchain_config( "-fdata-sections", ], dbg_compile_flags = ["-g"], - cxx_flags = ["-std=c++14"], + cxx_flags = ["-std=c++17"], link_flags = [ "-fuse-ld=gold", "-Wl,-no-as-needed", "-Wl,-z,relro,-z,now", - "-pass-exit-codes", "-lstdc++", "-lm", ], link_libs = [], opt_link_flags = [], unfiltered_compile_flags = [ - "-fno-canonical-system-headers", "-Wno-builtin-macro-redefined", "-D__DATE__=\"redacted\"", "-D__TIMESTAMP__=\"redacted\"", diff --git a/third_party/tsl/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/tsl/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl index d1d44f9fdb2ce..ac1d6eec9822b 100755 --- a/third_party/tsl/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl +++ b/third_party/tsl/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -24,8 +24,10 @@ import pipes # Template values set by rocm_configure.bzl. CPU_COMPILER = ('%{cpu_compiler}') +HOST_COMPILER_PATH = ('%{host_compiler_path}') HIPCC_PATH = '%{hipcc_path}' +PREFIX_DIR = os.path.dirname(HOST_COMPILER_PATH) HIPCC_ENV = '%{hipcc_env}' HIP_RUNTIME_PATH = '%{hip_runtime_path}' HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}' @@ -75,6 +77,7 @@ def GetHostCompilerOptions(argv): parser.add_argument('--sysroot', nargs=1) parser.add_argument('-g', nargs='*', action='append') parser.add_argument('-fno-canonical-system-headers', action='store_true') + parser.add_argument('-no-canonical-prefixes', action='store_true') parser.add_argument('--genco', action='store_true') args, _ = parser.parse_known_args(argv) @@ -87,7 +90,7 @@ def GetHostCompilerOptions(argv): opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) if args.g: opts += ' -g' + ' -g'.join(sum(args.g, [])) - if args.fno_canonical_system_headers: + if args.fno_canonical_system_headers or args.no_canonical_prefixes: opts += ' -no-canonical-prefixes' if args.sysroot: opts += ' --sysroot ' + args.sysroot[0] @@ -259,10 +262,11 @@ def main(): cpu_compiler_flags = [flag for flag in sys.argv[1:] if not flag.startswith(('--rocm_log'))] + gpu_linker_flags = ["-lstdc++"] # XXX: SE codes need to be built with gcc, but need this macro defined cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__") - if VERBOSE: print(' '.join([CPU_COMPILER] + cpu_compiler_flags)) - return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) + if VERBOSE: print(' '.join([CPU_COMPILER] + cpu_compiler_flags + gpu_linker_flags)) + return subprocess.call([CPU_COMPILER] + cpu_compiler_flags + gpu_linker_flags) if __name__ == '__main__': sys.exit(main()) diff --git a/third_party/tsl/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl b/third_party/tsl/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl index e0541defa3468..e5a942b66c17f 100644 --- a/third_party/tsl/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl +++ b/third_party/tsl/third_party/gpus/crosstool/hipcc_cc_toolchain_config.bzl.tpl @@ -1046,7 +1046,6 @@ def _impl(ctx): flag_group( flags = [ "-no-canonical-prefixes", - "-fno-canonical-system-headers", ] ), ], diff --git a/third_party/tsl/third_party/gpus/rocm_configure.bzl b/third_party/tsl/third_party/gpus/rocm_configure.bzl index 9206aea725d5b..4a80b2630db2c 100644 --- a/third_party/tsl/third_party/gpus/rocm_configure.bzl +++ b/third_party/tsl/third_party/gpus/rocm_configure.bzl @@ -3,7 +3,11 @@ `rocm_configure` depends on the following environment variables: * `TF_NEED_ROCM`: Whether to enable building with ROCm. - * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path + * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path. + * `TF_ROCM_CLANG`: Whether to use clang for C++ and HIPCC for ROCm compilation. + * `TF_SYSROOT`: The sysroot to use when compiling. + * `CLANG_COMPILER_PATH`: The clang compiler path that will be used for + host code compilation if TF_ROCM_CLANG is 1. * `ROCM_PATH`: The path to the ROCm toolkit. Default is `/opt/rocm`. * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. """ @@ -39,6 +43,8 @@ load( _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" +_CLANG_COMPILER_PATH = "CLANG_COMPILER_PATH" +_TF_SYSROOT = "TF_SYSROOT" _ROCM_TOOLKIT_PATH = "ROCM_PATH" _TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" _TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO" @@ -72,12 +78,15 @@ def verify_build_defines(params): ".", ) -def find_cc(repository_ctx): +def find_cc(repository_ctx, use_rocm_clang): """Find the C++ compiler.""" - # Return a dummy value for GCC detection here to avoid error - target_cc_name = "gcc" - cc_path_envvar = _GCC_HOST_COMPILER_PATH + if use_rocm_clang: + target_cc_name = "clang" + cc_path_envvar = _CLANG_COMPILER_PATH + else: + target_cc_name = "gcc" + cc_path_envvar = _GCC_HOST_COMPILER_PATH cc_name = target_cc_name cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar) @@ -99,24 +108,25 @@ def _cxx_inc_convert(path): path = path.strip() return path -def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp): +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot): """Compute the list of default C or C++ include directories.""" if lang_is_cpp: lang = "c++" else: lang = "c" - + sysroot = [] + if tf_sysroot: + sysroot += ["--sysroot", tf_sysroot] # TODO: We pass -no-canonical-prefixes here to match the compiler flags, # but in rocm_clang CROSSTOOL file that is a `feature` and we should # handle the case when it's disabled and no flag is passed result = raw_exec(repository_ctx, [ cc, - "-no-canonical-prefixes", "-E", "-x" + lang, "-", "-v", - ]) + ] + sysroot) stderr = err_out(result) index1 = stderr.find(_INC_DIR_MARKER_BEGIN) if index1 == -1: @@ -138,14 +148,24 @@ def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp): for p in inc_dirs.split("\n") ] -def get_cxx_inc_directories(repository_ctx, cc): +def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot): """Compute the list of default C and C++ include directories.""" # For some reason `clang -xc` sometimes returns include paths that are # different from the ones from `clang -xc++`. (Symlink and a dir) # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists - includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) - includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) + includes_cpp = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + True, + tf_sysroot, + ) + includes_c = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + False, + tf_sysroot, + ) includes_cpp_set = depset(includes_cpp) return includes_cpp + [ @@ -207,6 +227,7 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin): inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/16.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/17.0.0/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/17/include") + inc_dirs.append(rocm_toolkit_path + "/lib/llvm/lib/clang/17/include") inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/18/include") if int(rocm_config.rocm_version_number) >= 60200: inc_dirs.append(rocm_toolkit_path + "/lib/llvm/lib/clang/18/include") @@ -539,6 +560,16 @@ def _genrule(src_dir, genrule_name, command, outs): ")\n" ) +def _flag_enabled(repository_ctx, flag_name): + return get_host_environ(repository_ctx, flag_name) == "1" + +def _use_rocm_clang(repository_ctx): + # Returns the flag if we need to use clang for the host. + return _flag_enabled(repository_ctx, "TF_ROCM_CLANG") + +def _tf_sysroot(repository_ctx): + return get_host_environ(repository_ctx, _TF_SYSROOT, "") + def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets): amdgpu_target_flags = ["--amdgpu-target=" + amdgpu_target for amdgpu_target in amdgpu_targets] @@ -674,6 +705,10 @@ def _create_local_rocm_repository(repository_ctx): hiprand_include + rocrand_include), } + + is_rocm_clang = _use_rocm_clang(repository_ctx) + tf_sysroot = _tf_sysroot(repository_ctx) + if rocm_libs["hipblaslt"] != None: repository_dict["%{hipblaslt_lib}"] = rocm_libs["hipblaslt"].file_name @@ -689,24 +724,36 @@ def _create_local_rocm_repository(repository_ctx): # Set up crosstool/ - cc = find_cc(repository_ctx) + cc = find_cc(repository_ctx, is_rocm_clang) + host_compiler_includes = get_cxx_inc_directories( + repository_ctx, + cc, + tf_sysroot, + ) - host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc) - - host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX, "/usr/bin") + # host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc) rocm_defines = {} - + rocm_defines["%{builtin_sysroot}"] = tf_sysroot + rocm_defines["%{compiler}"] = "unknown" + if is_rocm_clang: + rocm_defines["%{compiler}"] = "clang" + host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX, "/usr/bin") rocm_defines["%{host_compiler_prefix}"] = host_compiler_prefix + rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + host_compiler_prefix + rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "" + rocm_defines["%{unfiltered_compile_flags}"] = "" + rocm_defines["%{rocm_hipcc_files}"] = "[]" - rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + "/hcc/compiler/bin" - - # For gcc, do not canonicalize system header paths; some versions of gcc - # pick the shortest possible path for system includes when creating the - # .d file - given that includes that are prefixed with "../" multiple - # time quickly grow longer than the root of the tree, this can lead to - # bazel's header check failing. - rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\"" + if is_rocm_clang: + rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-no-canonical-prefixes\"" + else: + # For gcc, do not canonicalize system header paths; some versions of gcc + # pick the shortest possible path for system includes when creating the + # .d file - given that includes that are prefixed with "../" multiple + # time quickly grow longer than the root of the tree, this can lead to + # bazel's header check failing. + rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\"" rocm_defines["%{unfiltered_compile_flags}"] = to_list_of_strings([ "-DTENSORFLOW_USE_ROCM=1", @@ -834,6 +881,7 @@ _ENVIRONS = [ _GCC_HOST_COMPILER_PATH, _GCC_HOST_COMPILER_PREFIX, "TF_NEED_ROCM", + "TF_ROCM_CLANG", "TF_NEED_CUDA", # Needed by the `if_gpu_is_configured` macro _ROCM_TOOLKIT_PATH, _TF_ROCM_AMDGPU_TARGETS,