Skip to content

Commit

Permalink
[ROCm] clang support
Browse files Browse the repository at this point in the history
  • Loading branch information
Ruturaj4 committed Oct 2, 2024
1 parent 877928f commit 389c84c
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 32 deletions.
20 changes: 20 additions & 0 deletions build_tools/configure/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ class CudaCompiler(ArgparseableEnum):
CLANG = enum.auto()
NVCC = enum.auto()

class RocmCompiler(ArgparseableEnum):
HIPCC = enum.auto()

class OS(ArgparseableEnum):
LINUX = enum.auto()
Expand Down Expand Up @@ -263,6 +265,9 @@ class XLAConfigOptions:
cuda_compiler: CudaCompiler
using_nccl: bool

# ROCM specific
rocm_compiler: RocmCompiler

def to_bazelrc_lines(
self,
dpav: DiscoverablePathsAndVersions,
Expand Down Expand Up @@ -352,6 +357,21 @@ def to_bazelrc_lines(
elif self.backend == Backend.ROCM:
build_and_test_tag_filters.append("-cuda-only")
build_and_test_tag_filters.append("-sycl-only")

compiler_pair = self.rocm_compiler, self.host_compiler

if compiler_pair == (RocmCompiler.HIPCC, HostCompiler.CLANG):
rc.append("build --config rocm")
# This is demanded by rocm_configure.bzl.
rc.append(
f"build --action_env CLANG_COMPILER_PATH={dpav.clang_path}"
)
elif compiler_pair == (RocmCompiler.HIPCC, HostCompiler.GCC):
rc.append("build --config rocm")
else:
raise NotImplementedError(
"ROCm clang with host compiler not supported"
)
elif self.backend == Backend.SYCL:
build_and_test_tag_filters.append("-cuda-only")
build_and_test_tag_filters.append("-rocm-only")
Expand Down
4 changes: 1 addition & 3 deletions third_party/tsl/third_party/gpus/crosstool/BUILD.rocm.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -82,19 +82,17 @@ cc_toolchain_config(
"-fdata-sections",
],
dbg_compile_flags = ["-g"],
cxx_flags = ["-std=c++14"],
cxx_flags = ["-std=c++17"],
link_flags = [
"-fuse-ld=gold",
"-Wl,-no-as-needed",
"-Wl,-z,relro,-z,now",
"-pass-exit-codes",
"-lstdc++",
"-lm",
],
link_libs = [],
opt_link_flags = [],
unfiltered_compile_flags = [
"-fno-canonical-system-headers",
"-Wno-builtin-macro-redefined",
"-D__DATE__=\"redacted\"",
"-D__TIMESTAMP__=\"redacted\"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@ import pipes

# Template values set by rocm_configure.bzl.
CPU_COMPILER = ('%{cpu_compiler}')
HOST_COMPILER_PATH = ('%{host_compiler_path}')

HIPCC_PATH = '%{hipcc_path}'
PREFIX_DIR = os.path.dirname(HOST_COMPILER_PATH)
HIPCC_ENV = '%{hipcc_env}'
HIP_RUNTIME_PATH = '%{hip_runtime_path}'
HIP_RUNTIME_LIBRARY = '%{hip_runtime_library}'
Expand Down Expand Up @@ -75,6 +77,7 @@ def GetHostCompilerOptions(argv):
parser.add_argument('--sysroot', nargs=1)
parser.add_argument('-g', nargs='*', action='append')
parser.add_argument('-fno-canonical-system-headers', action='store_true')
parser.add_argument('-no-canonical-prefixes', action='store_true')
parser.add_argument('--genco', action='store_true')

args, _ = parser.parse_known_args(argv)
Expand All @@ -87,7 +90,7 @@ def GetHostCompilerOptions(argv):
opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, []))
if args.g:
opts += ' -g' + ' -g'.join(sum(args.g, []))
if args.fno_canonical_system_headers:
if args.fno_canonical_system_headers or args.no_canonical_prefixes:
opts += ' -no-canonical-prefixes'
if args.sysroot:
opts += ' --sysroot ' + args.sysroot[0]
Expand Down Expand Up @@ -259,10 +262,11 @@ def main():
cpu_compiler_flags = [flag for flag in sys.argv[1:]
if not flag.startswith(('--rocm_log'))]

gpu_linker_flags = ["-lstdc++"]
# XXX: SE codes need to be built with gcc, but need this macro defined
cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__")
if VERBOSE: print(' '.join([CPU_COMPILER] + cpu_compiler_flags))
return subprocess.call([CPU_COMPILER] + cpu_compiler_flags)
if VERBOSE: print(' '.join([CPU_COMPILER] + cpu_compiler_flags + gpu_linker_flags))
return subprocess.call([CPU_COMPILER] + cpu_compiler_flags + gpu_linker_flags)

if __name__ == '__main__':
sys.exit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,6 @@ def _impl(ctx):
flag_group(
flags = [
"-no-canonical-prefixes",
"-fno-canonical-system-headers",
]
),
],
Expand Down
98 changes: 73 additions & 25 deletions third_party/tsl/third_party/gpus/rocm_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
`rocm_configure` depends on the following environment variables:
* `TF_NEED_ROCM`: Whether to enable building with ROCm.
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path
* `GCC_HOST_COMPILER_PATH`: The GCC host compiler path.
* `TF_ROCM_CLANG`: Whether to use clang for C++ and HIPCC for ROCm compilation.
* `TF_SYSROOT`: The sysroot to use when compiling.
* `CLANG_COMPILER_PATH`: The clang compiler path that will be used for
host code compilation if TF_ROCM_CLANG is 1.
* `ROCM_PATH`: The path to the ROCm toolkit. Default is `/opt/rocm`.
* `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets.
"""
Expand Down Expand Up @@ -39,6 +43,8 @@ load(

_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH"
_GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX"
_CLANG_COMPILER_PATH = "CLANG_COMPILER_PATH"
_TF_SYSROOT = "TF_SYSROOT"
_ROCM_TOOLKIT_PATH = "ROCM_PATH"
_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS"
_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO"
Expand Down Expand Up @@ -72,12 +78,15 @@ def verify_build_defines(params):
".",
)

def find_cc(repository_ctx):
def find_cc(repository_ctx, use_rocm_clang):
"""Find the C++ compiler."""

# Return a dummy value for GCC detection here to avoid error
target_cc_name = "gcc"
cc_path_envvar = _GCC_HOST_COMPILER_PATH
if use_rocm_clang:
target_cc_name = "clang"
cc_path_envvar = _CLANG_COMPILER_PATH
else:
target_cc_name = "gcc"
cc_path_envvar = _GCC_HOST_COMPILER_PATH
cc_name = target_cc_name

cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar)
Expand All @@ -99,24 +108,25 @@ def _cxx_inc_convert(path):
path = path.strip()
return path

def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot):
"""Compute the list of default C or C++ include directories."""
if lang_is_cpp:
lang = "c++"
else:
lang = "c"

sysroot = []
if tf_sysroot:
sysroot += ["--sysroot", tf_sysroot]
# TODO: We pass -no-canonical-prefixes here to match the compiler flags,
# but in rocm_clang CROSSTOOL file that is a `feature` and we should
# handle the case when it's disabled and no flag is passed
result = raw_exec(repository_ctx, [
cc,
"-no-canonical-prefixes",
"-E",
"-x" + lang,
"-",
"-v",
])
] + sysroot)
stderr = err_out(result)
index1 = stderr.find(_INC_DIR_MARKER_BEGIN)
if index1 == -1:
Expand All @@ -138,14 +148,24 @@ def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
for p in inc_dirs.split("\n")
]

def get_cxx_inc_directories(repository_ctx, cc):
def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot):
"""Compute the list of default C and C++ include directories."""

# For some reason `clang -xc` sometimes returns include paths that are
# different from the ones from `clang -xc++`. (Symlink and a dir)
# So we run the compiler with both `-xc` and `-xc++` and merge resulting lists
includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True)
includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False)
includes_cpp = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
True,
tf_sysroot,
)
includes_c = _get_cxx_inc_directories_impl(
repository_ctx,
cc,
False,
tf_sysroot,
)

includes_cpp_set = depset(includes_cpp)
return includes_cpp + [
Expand Down Expand Up @@ -207,6 +227,7 @@ def _rocm_include_path(repository_ctx, rocm_config, bash_bin):
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/16.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/17.0.0/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/17/include")
inc_dirs.append(rocm_toolkit_path + "/lib/llvm/lib/clang/17/include")
inc_dirs.append(rocm_toolkit_path + "/llvm/lib/clang/18/include")
if int(rocm_config.rocm_version_number) >= 60200:
inc_dirs.append(rocm_toolkit_path + "/lib/llvm/lib/clang/18/include")
Expand Down Expand Up @@ -539,6 +560,16 @@ def _genrule(src_dir, genrule_name, command, outs):
")\n"
)

def _flag_enabled(repository_ctx, flag_name):
return get_host_environ(repository_ctx, flag_name) == "1"

def _use_rocm_clang(repository_ctx):
# Returns the flag if we need to use clang for the host.
return _flag_enabled(repository_ctx, "TF_ROCM_CLANG")

def _tf_sysroot(repository_ctx):
return get_host_environ(repository_ctx, _TF_SYSROOT, "")

def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets):
amdgpu_target_flags = ["--amdgpu-target=" +
amdgpu_target for amdgpu_target in amdgpu_targets]
Expand Down Expand Up @@ -674,6 +705,10 @@ def _create_local_rocm_repository(repository_ctx):
hiprand_include +
rocrand_include),
}

is_rocm_clang = _use_rocm_clang(repository_ctx)
tf_sysroot = _tf_sysroot(repository_ctx)

if rocm_libs["hipblaslt"] != None:
repository_dict["%{hipblaslt_lib}"] = rocm_libs["hipblaslt"].file_name

Expand All @@ -689,24 +724,36 @@ def _create_local_rocm_repository(repository_ctx):

# Set up crosstool/

cc = find_cc(repository_ctx)
cc = find_cc(repository_ctx, is_rocm_clang)
host_compiler_includes = get_cxx_inc_directories(
repository_ctx,
cc,
tf_sysroot,
)

host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc)

host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX, "/usr/bin")
# host_compiler_includes = get_cxx_inc_directories(repository_ctx, cc)

rocm_defines = {}

rocm_defines["%{builtin_sysroot}"] = tf_sysroot
rocm_defines["%{compiler}"] = "unknown"
if is_rocm_clang:
rocm_defines["%{compiler}"] = "clang"
host_compiler_prefix = get_host_environ(repository_ctx, _GCC_HOST_COMPILER_PREFIX, "/usr/bin")
rocm_defines["%{host_compiler_prefix}"] = host_compiler_prefix
rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + host_compiler_prefix
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = ""
rocm_defines["%{unfiltered_compile_flags}"] = ""
rocm_defines["%{rocm_hipcc_files}"] = "[]"

rocm_defines["%{linker_bin_path}"] = rocm_config.rocm_toolkit_path + "/hcc/compiler/bin"

# For gcc, do not canonicalize system header paths; some versions of gcc
# pick the shortest possible path for system includes when creating the
# .d file - given that includes that are prefixed with "../" multiple
# time quickly grow longer than the root of the tree, this can lead to
# bazel's header check failing.
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\""
if is_rocm_clang:
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-no-canonical-prefixes\""
else:
# For gcc, do not canonicalize system header paths; some versions of gcc
# pick the shortest possible path for system includes when creating the
# .d file - given that includes that are prefixed with "../" multiple
# time quickly grow longer than the root of the tree, this can lead to
# bazel's header check failing.
rocm_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\""

rocm_defines["%{unfiltered_compile_flags}"] = to_list_of_strings([
"-DTENSORFLOW_USE_ROCM=1",
Expand Down Expand Up @@ -834,6 +881,7 @@ _ENVIRONS = [
_GCC_HOST_COMPILER_PATH,
_GCC_HOST_COMPILER_PREFIX,
"TF_NEED_ROCM",
"TF_ROCM_CLANG",
"TF_NEED_CUDA", # Needed by the `if_gpu_is_configured` macro
_ROCM_TOOLKIT_PATH,
_TF_ROCM_AMDGPU_TARGETS,
Expand Down

0 comments on commit 389c84c

Please sign in to comment.